• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK0,X86-FALLBACK0
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK1,X86-FALLBACK1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK2,X86-FALLBACK2
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK3,X86-FALLBACK3
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK4,X86-FALLBACK4
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK0,X64-FALLBACK0
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK1,X64-FALLBACK1
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK2,X64-FALLBACK2
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK3,X64-FALLBACK3
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK4,X64-FALLBACK4
12
13; Patterns:
14;    c) x &  (-1 >> y)
15;    d) x << y >> y
16; are equivalent, but we prefer the second variant if we have BMI2.
17
18; We do not test the variant where y = (32 - z), because that is BMI2's BZHI.
19
20; ---------------------------------------------------------------------------- ;
21; 8-bit
22; ---------------------------------------------------------------------------- ;
23
24define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
25; X86-LABEL: clear_highbits8_c0:
26; X86:       # %bb.0:
27; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
28; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
29; X86-NEXT:    shlb %cl, %al
30; X86-NEXT:    shrb %cl, %al
31; X86-NEXT:    retl
32;
33; X64-LABEL: clear_highbits8_c0:
34; X64:       # %bb.0:
35; X64-NEXT:    movl %esi, %ecx
36; X64-NEXT:    shlb %cl, %dil
37; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
38; X64-NEXT:    shrb %cl, %dil
39; X64-NEXT:    movl %edi, %eax
40; X64-NEXT:    retq
41  %mask = lshr i8 -1, %numhighbits
42  %masked = and i8 %mask, %val
43  ret i8 %masked
44}
45
46define i8 @clear_highbits8_c2_load(i8* %w, i8 %numhighbits) nounwind {
47; X86-LABEL: clear_highbits8_c2_load:
48; X86:       # %bb.0:
49; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
50; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
51; X86-NEXT:    movb (%eax), %al
52; X86-NEXT:    shlb %cl, %al
53; X86-NEXT:    shrb %cl, %al
54; X86-NEXT:    retl
55;
56; X64-LABEL: clear_highbits8_c2_load:
57; X64:       # %bb.0:
58; X64-NEXT:    movl %esi, %ecx
59; X64-NEXT:    movb (%rdi), %al
60; X64-NEXT:    shlb %cl, %al
61; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
62; X64-NEXT:    shrb %cl, %al
63; X64-NEXT:    retq
64  %val = load i8, i8* %w
65  %mask = lshr i8 -1, %numhighbits
66  %masked = and i8 %mask, %val
67  ret i8 %masked
68}
69
70define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
71; X86-LABEL: clear_highbits8_c4_commutative:
72; X86:       # %bb.0:
73; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
74; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
75; X86-NEXT:    shlb %cl, %al
76; X86-NEXT:    shrb %cl, %al
77; X86-NEXT:    retl
78;
79; X64-LABEL: clear_highbits8_c4_commutative:
80; X64:       # %bb.0:
81; X64-NEXT:    movl %esi, %ecx
82; X64-NEXT:    shlb %cl, %dil
83; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
84; X64-NEXT:    shrb %cl, %dil
85; X64-NEXT:    movl %edi, %eax
86; X64-NEXT:    retq
87  %mask = lshr i8 -1, %numhighbits
88  %masked = and i8 %val, %mask ; swapped order
89  ret i8 %masked
90}
91
92; ---------------------------------------------------------------------------- ;
93; 16-bit
94; ---------------------------------------------------------------------------- ;
95
96define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
97; X86-NOBMI2-LABEL: clear_highbits16_c0:
98; X86-NOBMI2:       # %bb.0:
99; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
100; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
101; X86-NOBMI2-NEXT:    shll %cl, %eax
102; X86-NOBMI2-NEXT:    movzwl %ax, %eax
103; X86-NOBMI2-NEXT:    shrl %cl, %eax
104; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
105; X86-NOBMI2-NEXT:    retl
106;
107; X86-BMI2-LABEL: clear_highbits16_c0:
108; X86-BMI2:       # %bb.0:
109; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
110; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
111; X86-BMI2-NEXT:    movzwl %cx, %ecx
112; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
113; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
114; X86-BMI2-NEXT:    retl
115;
116; X64-NOBMI2-LABEL: clear_highbits16_c0:
117; X64-NOBMI2:       # %bb.0:
118; X64-NOBMI2-NEXT:    movl %esi, %ecx
119; X64-NOBMI2-NEXT:    shll %cl, %edi
120; X64-NOBMI2-NEXT:    movzwl %di, %eax
121; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
122; X64-NOBMI2-NEXT:    shrl %cl, %eax
123; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
124; X64-NOBMI2-NEXT:    retq
125;
126; X64-BMI2-LABEL: clear_highbits16_c0:
127; X64-BMI2:       # %bb.0:
128; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
129; X64-BMI2-NEXT:    movzwl %ax, %eax
130; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
131; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
132; X64-BMI2-NEXT:    retq
133  %mask = lshr i16 -1, %numhighbits
134  %masked = and i16 %mask, %val
135  ret i16 %masked
136}
137
138define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
139; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
140; X86-NOBMI2:       # %bb.0:
141; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
142; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
143; X86-NOBMI2-NEXT:    shll %cl, %eax
144; X86-NOBMI2-NEXT:    movzwl %ax, %eax
145; X86-NOBMI2-NEXT:    shrl %cl, %eax
146; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
147; X86-NOBMI2-NEXT:    retl
148;
149; X86-BMI2-LABEL: clear_highbits16_c1_indexzext:
150; X86-BMI2:       # %bb.0:
151; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
152; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
153; X86-BMI2-NEXT:    movzwl %cx, %ecx
154; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
155; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
156; X86-BMI2-NEXT:    retl
157;
158; X64-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
159; X64-NOBMI2:       # %bb.0:
160; X64-NOBMI2-NEXT:    movl %esi, %ecx
161; X64-NOBMI2-NEXT:    shll %cl, %edi
162; X64-NOBMI2-NEXT:    movzwl %di, %eax
163; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
164; X64-NOBMI2-NEXT:    shrl %cl, %eax
165; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
166; X64-NOBMI2-NEXT:    retq
167;
168; X64-BMI2-LABEL: clear_highbits16_c1_indexzext:
169; X64-BMI2:       # %bb.0:
170; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
171; X64-BMI2-NEXT:    movzwl %ax, %eax
172; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
173; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
174; X64-BMI2-NEXT:    retq
175  %sh_prom = zext i8 %numhighbits to i16
176  %mask = lshr i16 -1, %sh_prom
177  %masked = and i16 %mask, %val
178  ret i16 %masked
179}
180
181define i16 @clear_highbits16_c2_load(i16* %w, i16 %numhighbits) nounwind {
182; X86-NOBMI2-LABEL: clear_highbits16_c2_load:
183; X86-NOBMI2:       # %bb.0:
184; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
185; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
186; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
187; X86-NOBMI2-NEXT:    shll %cl, %eax
188; X86-NOBMI2-NEXT:    movzwl %ax, %eax
189; X86-NOBMI2-NEXT:    shrl %cl, %eax
190; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
191; X86-NOBMI2-NEXT:    retl
192;
193; X86-BMI2-LABEL: clear_highbits16_c2_load:
194; X86-BMI2:       # %bb.0:
195; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
196; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
197; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
198; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
199; X86-BMI2-NEXT:    movzwl %cx, %ecx
200; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
201; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
202; X86-BMI2-NEXT:    retl
203;
204; X64-NOBMI2-LABEL: clear_highbits16_c2_load:
205; X64-NOBMI2:       # %bb.0:
206; X64-NOBMI2-NEXT:    movl %esi, %ecx
207; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
208; X64-NOBMI2-NEXT:    shll %cl, %eax
209; X64-NOBMI2-NEXT:    movzwl %ax, %eax
210; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
211; X64-NOBMI2-NEXT:    shrl %cl, %eax
212; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
213; X64-NOBMI2-NEXT:    retq
214;
215; X64-BMI2-LABEL: clear_highbits16_c2_load:
216; X64-BMI2:       # %bb.0:
217; X64-BMI2-NEXT:    movzwl (%rdi), %eax
218; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
219; X64-BMI2-NEXT:    movzwl %ax, %eax
220; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
221; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
222; X64-BMI2-NEXT:    retq
223  %val = load i16, i16* %w
224  %mask = lshr i16 -1, %numhighbits
225  %masked = and i16 %mask, %val
226  ret i16 %masked
227}
228
229define i16 @clear_highbits16_c3_load_indexzext(i16* %w, i8 %numhighbits) nounwind {
230; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
231; X86-NOBMI2:       # %bb.0:
232; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
233; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
234; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
235; X86-NOBMI2-NEXT:    shll %cl, %eax
236; X86-NOBMI2-NEXT:    movzwl %ax, %eax
237; X86-NOBMI2-NEXT:    shrl %cl, %eax
238; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
239; X86-NOBMI2-NEXT:    retl
240;
241; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
242; X86-BMI2:       # %bb.0:
243; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
244; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
245; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
246; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
247; X86-BMI2-NEXT:    movzwl %cx, %ecx
248; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
249; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
250; X86-BMI2-NEXT:    retl
251;
252; X64-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
253; X64-NOBMI2:       # %bb.0:
254; X64-NOBMI2-NEXT:    movl %esi, %ecx
255; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
256; X64-NOBMI2-NEXT:    shll %cl, %eax
257; X64-NOBMI2-NEXT:    movzwl %ax, %eax
258; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
259; X64-NOBMI2-NEXT:    shrl %cl, %eax
260; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
261; X64-NOBMI2-NEXT:    retq
262;
263; X64-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
264; X64-BMI2:       # %bb.0:
265; X64-BMI2-NEXT:    movzwl (%rdi), %eax
266; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
267; X64-BMI2-NEXT:    movzwl %ax, %eax
268; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
269; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
270; X64-BMI2-NEXT:    retq
271  %val = load i16, i16* %w
272  %sh_prom = zext i8 %numhighbits to i16
273  %mask = lshr i16 -1, %sh_prom
274  %masked = and i16 %mask, %val
275  ret i16 %masked
276}
277
278define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind {
279; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative:
280; X86-NOBMI2:       # %bb.0:
281; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
282; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
283; X86-NOBMI2-NEXT:    shll %cl, %eax
284; X86-NOBMI2-NEXT:    movzwl %ax, %eax
285; X86-NOBMI2-NEXT:    shrl %cl, %eax
286; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
287; X86-NOBMI2-NEXT:    retl
288;
289; X86-BMI2-LABEL: clear_highbits16_c4_commutative:
290; X86-BMI2:       # %bb.0:
291; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
292; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
293; X86-BMI2-NEXT:    movzwl %cx, %ecx
294; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
295; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
296; X86-BMI2-NEXT:    retl
297;
298; X64-NOBMI2-LABEL: clear_highbits16_c4_commutative:
299; X64-NOBMI2:       # %bb.0:
300; X64-NOBMI2-NEXT:    movl %esi, %ecx
301; X64-NOBMI2-NEXT:    shll %cl, %edi
302; X64-NOBMI2-NEXT:    movzwl %di, %eax
303; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
304; X64-NOBMI2-NEXT:    shrl %cl, %eax
305; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
306; X64-NOBMI2-NEXT:    retq
307;
308; X64-BMI2-LABEL: clear_highbits16_c4_commutative:
309; X64-BMI2:       # %bb.0:
310; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
311; X64-BMI2-NEXT:    movzwl %ax, %eax
312; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
313; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
314; X64-BMI2-NEXT:    retq
315  %mask = lshr i16 -1, %numhighbits
316  %masked = and i16 %val, %mask ; swapped order
317  ret i16 %masked
318}
319
320; ---------------------------------------------------------------------------- ;
321; 32-bit
322; ---------------------------------------------------------------------------- ;
323
324define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
325; X86-NOBMI2-LABEL: clear_highbits32_c0:
326; X86-NOBMI2:       # %bb.0:
327; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
328; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
329; X86-NOBMI2-NEXT:    shll %cl, %eax
330; X86-NOBMI2-NEXT:    shrl %cl, %eax
331; X86-NOBMI2-NEXT:    retl
332;
333; X86-BMI2-LABEL: clear_highbits32_c0:
334; X86-BMI2:       # %bb.0:
335; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
336; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
337; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
338; X86-BMI2-NEXT:    retl
339;
340; X64-NOBMI2-LABEL: clear_highbits32_c0:
341; X64-NOBMI2:       # %bb.0:
342; X64-NOBMI2-NEXT:    movl %esi, %ecx
343; X64-NOBMI2-NEXT:    shll %cl, %edi
344; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
345; X64-NOBMI2-NEXT:    shrl %cl, %edi
346; X64-NOBMI2-NEXT:    movl %edi, %eax
347; X64-NOBMI2-NEXT:    retq
348;
349; X64-BMI2-LABEL: clear_highbits32_c0:
350; X64-BMI2:       # %bb.0:
351; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
352; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
353; X64-BMI2-NEXT:    retq
354  %mask = lshr i32 -1, %numhighbits
355  %masked = and i32 %mask, %val
356  ret i32 %masked
357}
358
359define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
360; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
361; X86-NOBMI2:       # %bb.0:
362; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
363; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
364; X86-NOBMI2-NEXT:    shll %cl, %eax
365; X86-NOBMI2-NEXT:    shrl %cl, %eax
366; X86-NOBMI2-NEXT:    retl
367;
368; X86-BMI2-LABEL: clear_highbits32_c1_indexzext:
369; X86-BMI2:       # %bb.0:
370; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
371; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
372; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
373; X86-BMI2-NEXT:    retl
374;
375; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
376; X64-NOBMI2:       # %bb.0:
377; X64-NOBMI2-NEXT:    movl %esi, %ecx
378; X64-NOBMI2-NEXT:    shll %cl, %edi
379; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
380; X64-NOBMI2-NEXT:    shrl %cl, %edi
381; X64-NOBMI2-NEXT:    movl %edi, %eax
382; X64-NOBMI2-NEXT:    retq
383;
384; X64-BMI2-LABEL: clear_highbits32_c1_indexzext:
385; X64-BMI2:       # %bb.0:
386; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
387; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
388; X64-BMI2-NEXT:    retq
389  %sh_prom = zext i8 %numhighbits to i32
390  %mask = lshr i32 -1, %sh_prom
391  %masked = and i32 %mask, %val
392  ret i32 %masked
393}
394
395define i32 @clear_highbits32_c2_load(i32* %w, i32 %numhighbits) nounwind {
396; X86-NOBMI2-LABEL: clear_highbits32_c2_load:
397; X86-NOBMI2:       # %bb.0:
398; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
399; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
400; X86-NOBMI2-NEXT:    movl (%eax), %eax
401; X86-NOBMI2-NEXT:    shll %cl, %eax
402; X86-NOBMI2-NEXT:    shrl %cl, %eax
403; X86-NOBMI2-NEXT:    retl
404;
405; X86-BMI2-LABEL: clear_highbits32_c2_load:
406; X86-BMI2:       # %bb.0:
407; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
408; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
409; X86-BMI2-NEXT:    shlxl %ecx, (%eax), %eax
410; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
411; X86-BMI2-NEXT:    retl
412;
413; X64-NOBMI2-LABEL: clear_highbits32_c2_load:
414; X64-NOBMI2:       # %bb.0:
415; X64-NOBMI2-NEXT:    movl %esi, %ecx
416; X64-NOBMI2-NEXT:    movl (%rdi), %eax
417; X64-NOBMI2-NEXT:    shll %cl, %eax
418; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
419; X64-NOBMI2-NEXT:    shrl %cl, %eax
420; X64-NOBMI2-NEXT:    retq
421;
422; X64-BMI2-LABEL: clear_highbits32_c2_load:
423; X64-BMI2:       # %bb.0:
424; X64-BMI2-NEXT:    shlxl %esi, (%rdi), %eax
425; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
426; X64-BMI2-NEXT:    retq
427  %val = load i32, i32* %w
428  %mask = lshr i32 -1, %numhighbits
429  %masked = and i32 %mask, %val
430  ret i32 %masked
431}
432
433define i32 @clear_highbits32_c3_load_indexzext(i32* %w, i8 %numhighbits) nounwind {
434; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
435; X86-NOBMI2:       # %bb.0:
436; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
437; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
438; X86-NOBMI2-NEXT:    movl (%eax), %eax
439; X86-NOBMI2-NEXT:    shll %cl, %eax
440; X86-NOBMI2-NEXT:    shrl %cl, %eax
441; X86-NOBMI2-NEXT:    retl
442;
443; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
444; X86-BMI2:       # %bb.0:
445; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
446; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
447; X86-BMI2-NEXT:    shlxl %ecx, (%eax), %eax
448; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
449; X86-BMI2-NEXT:    retl
450;
451; X64-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
452; X64-NOBMI2:       # %bb.0:
453; X64-NOBMI2-NEXT:    movl %esi, %ecx
454; X64-NOBMI2-NEXT:    movl (%rdi), %eax
455; X64-NOBMI2-NEXT:    shll %cl, %eax
456; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
457; X64-NOBMI2-NEXT:    shrl %cl, %eax
458; X64-NOBMI2-NEXT:    retq
459;
460; X64-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
461; X64-BMI2:       # %bb.0:
462; X64-BMI2-NEXT:    shlxl %esi, (%rdi), %eax
463; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
464; X64-BMI2-NEXT:    retq
465  %val = load i32, i32* %w
466  %sh_prom = zext i8 %numhighbits to i32
467  %mask = lshr i32 -1, %sh_prom
468  %masked = and i32 %mask, %val
469  ret i32 %masked
470}
471
472define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind {
473; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative:
474; X86-NOBMI2:       # %bb.0:
475; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
476; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
477; X86-NOBMI2-NEXT:    shll %cl, %eax
478; X86-NOBMI2-NEXT:    shrl %cl, %eax
479; X86-NOBMI2-NEXT:    retl
480;
481; X86-BMI2-LABEL: clear_highbits32_c4_commutative:
482; X86-BMI2:       # %bb.0:
483; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
484; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
485; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
486; X86-BMI2-NEXT:    retl
487;
488; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative:
489; X64-NOBMI2:       # %bb.0:
490; X64-NOBMI2-NEXT:    movl %esi, %ecx
491; X64-NOBMI2-NEXT:    shll %cl, %edi
492; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
493; X64-NOBMI2-NEXT:    shrl %cl, %edi
494; X64-NOBMI2-NEXT:    movl %edi, %eax
495; X64-NOBMI2-NEXT:    retq
496;
497; X64-BMI2-LABEL: clear_highbits32_c4_commutative:
498; X64-BMI2:       # %bb.0:
499; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
500; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
501; X64-BMI2-NEXT:    retq
502  %mask = lshr i32 -1, %numhighbits
503  %masked = and i32 %val, %mask ; swapped order
504  ret i32 %masked
505}
506
507; ---------------------------------------------------------------------------- ;
508; 64-bit
509; ---------------------------------------------------------------------------- ;
510
511define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
512; X86-NOBMI2-LABEL: clear_highbits64_c0:
513; X86-NOBMI2:       # %bb.0:
514; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
515; X86-NOBMI2-NEXT:    movl $-1, %eax
516; X86-NOBMI2-NEXT:    movl $-1, %edx
517; X86-NOBMI2-NEXT:    shrl %cl, %edx
518; X86-NOBMI2-NEXT:    shrdl %cl, %eax, %eax
519; X86-NOBMI2-NEXT:    testb $32, %cl
520; X86-NOBMI2-NEXT:    je .LBB13_2
521; X86-NOBMI2-NEXT:  # %bb.1:
522; X86-NOBMI2-NEXT:    movl %edx, %eax
523; X86-NOBMI2-NEXT:    xorl %edx, %edx
524; X86-NOBMI2-NEXT:  .LBB13_2:
525; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
526; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
527; X86-NOBMI2-NEXT:    retl
528;
529; X86-BMI2-LABEL: clear_highbits64_c0:
530; X86-BMI2:       # %bb.0:
531; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
532; X86-BMI2-NEXT:    movl $-1, %eax
533; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
534; X86-BMI2-NEXT:    shrdl %cl, %eax, %eax
535; X86-BMI2-NEXT:    testb $32, %cl
536; X86-BMI2-NEXT:    je .LBB13_2
537; X86-BMI2-NEXT:  # %bb.1:
538; X86-BMI2-NEXT:    movl %edx, %eax
539; X86-BMI2-NEXT:    xorl %edx, %edx
540; X86-BMI2-NEXT:  .LBB13_2:
541; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
542; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
543; X86-BMI2-NEXT:    retl
544;
545; X64-NOBMI2-LABEL: clear_highbits64_c0:
546; X64-NOBMI2:       # %bb.0:
547; X64-NOBMI2-NEXT:    movq %rsi, %rcx
548; X64-NOBMI2-NEXT:    shlq %cl, %rdi
549; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
550; X64-NOBMI2-NEXT:    shrq %cl, %rdi
551; X64-NOBMI2-NEXT:    movq %rdi, %rax
552; X64-NOBMI2-NEXT:    retq
553;
554; X64-BMI2-LABEL: clear_highbits64_c0:
555; X64-BMI2:       # %bb.0:
556; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
557; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
558; X64-BMI2-NEXT:    retq
559  %mask = lshr i64 -1, %numhighbits
560  %masked = and i64 %mask, %val
561  ret i64 %masked
562}
563
564define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
565; X86-NOBMI2-LABEL: clear_highbits64_c1_indexzext:
566; X86-NOBMI2:       # %bb.0:
567; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
568; X86-NOBMI2-NEXT:    movl $-1, %eax
569; X86-NOBMI2-NEXT:    movl $-1, %edx
570; X86-NOBMI2-NEXT:    shrl %cl, %edx
571; X86-NOBMI2-NEXT:    shrdl %cl, %eax, %eax
572; X86-NOBMI2-NEXT:    testb $32, %cl
573; X86-NOBMI2-NEXT:    je .LBB14_2
574; X86-NOBMI2-NEXT:  # %bb.1:
575; X86-NOBMI2-NEXT:    movl %edx, %eax
576; X86-NOBMI2-NEXT:    xorl %edx, %edx
577; X86-NOBMI2-NEXT:  .LBB14_2:
578; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
579; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
580; X86-NOBMI2-NEXT:    retl
581;
582; X86-BMI2-LABEL: clear_highbits64_c1_indexzext:
583; X86-BMI2:       # %bb.0:
584; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
585; X86-BMI2-NEXT:    movl $-1, %eax
586; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
587; X86-BMI2-NEXT:    shrdl %cl, %eax, %eax
588; X86-BMI2-NEXT:    testb $32, %cl
589; X86-BMI2-NEXT:    je .LBB14_2
590; X86-BMI2-NEXT:  # %bb.1:
591; X86-BMI2-NEXT:    movl %edx, %eax
592; X86-BMI2-NEXT:    xorl %edx, %edx
593; X86-BMI2-NEXT:  .LBB14_2:
594; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
595; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
596; X86-BMI2-NEXT:    retl
597;
598; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext:
599; X64-NOBMI2:       # %bb.0:
600; X64-NOBMI2-NEXT:    movl %esi, %ecx
601; X64-NOBMI2-NEXT:    shlq %cl, %rdi
602; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
603; X64-NOBMI2-NEXT:    shrq %cl, %rdi
604; X64-NOBMI2-NEXT:    movq %rdi, %rax
605; X64-NOBMI2-NEXT:    retq
606;
607; X64-BMI2-LABEL: clear_highbits64_c1_indexzext:
608; X64-BMI2:       # %bb.0:
609; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
610; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
611; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
612; X64-BMI2-NEXT:    retq
613  %sh_prom = zext i8 %numhighbits to i64
614  %mask = lshr i64 -1, %sh_prom
615  %masked = and i64 %mask, %val
616  ret i64 %masked
617}
618
619define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
620; X86-NOBMI2-LABEL: clear_highbits64_c2_load:
621; X86-NOBMI2:       # %bb.0:
622; X86-NOBMI2-NEXT:    pushl %esi
623; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
624; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
625; X86-NOBMI2-NEXT:    movl $-1, %eax
626; X86-NOBMI2-NEXT:    movl $-1, %edx
627; X86-NOBMI2-NEXT:    shrl %cl, %edx
628; X86-NOBMI2-NEXT:    shrdl %cl, %eax, %eax
629; X86-NOBMI2-NEXT:    testb $32, %cl
630; X86-NOBMI2-NEXT:    je .LBB15_2
631; X86-NOBMI2-NEXT:  # %bb.1:
632; X86-NOBMI2-NEXT:    movl %edx, %eax
633; X86-NOBMI2-NEXT:    xorl %edx, %edx
634; X86-NOBMI2-NEXT:  .LBB15_2:
635; X86-NOBMI2-NEXT:    andl (%esi), %eax
636; X86-NOBMI2-NEXT:    andl 4(%esi), %edx
637; X86-NOBMI2-NEXT:    popl %esi
638; X86-NOBMI2-NEXT:    retl
639;
640; X86-BMI2-LABEL: clear_highbits64_c2_load:
641; X86-BMI2:       # %bb.0:
642; X86-BMI2-NEXT:    pushl %esi
643; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
644; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
645; X86-BMI2-NEXT:    movl $-1, %eax
646; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
647; X86-BMI2-NEXT:    shrdl %cl, %eax, %eax
648; X86-BMI2-NEXT:    testb $32, %cl
649; X86-BMI2-NEXT:    je .LBB15_2
650; X86-BMI2-NEXT:  # %bb.1:
651; X86-BMI2-NEXT:    movl %edx, %eax
652; X86-BMI2-NEXT:    xorl %edx, %edx
653; X86-BMI2-NEXT:  .LBB15_2:
654; X86-BMI2-NEXT:    andl (%esi), %eax
655; X86-BMI2-NEXT:    andl 4(%esi), %edx
656; X86-BMI2-NEXT:    popl %esi
657; X86-BMI2-NEXT:    retl
658;
659; X64-NOBMI2-LABEL: clear_highbits64_c2_load:
660; X64-NOBMI2:       # %bb.0:
661; X64-NOBMI2-NEXT:    movq %rsi, %rcx
662; X64-NOBMI2-NEXT:    movq (%rdi), %rax
663; X64-NOBMI2-NEXT:    shlq %cl, %rax
664; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
665; X64-NOBMI2-NEXT:    shrq %cl, %rax
666; X64-NOBMI2-NEXT:    retq
667;
668; X64-BMI2-LABEL: clear_highbits64_c2_load:
669; X64-BMI2:       # %bb.0:
670; X64-BMI2-NEXT:    shlxq %rsi, (%rdi), %rax
671; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
672; X64-BMI2-NEXT:    retq
673  %val = load i64, i64* %w
674  %mask = lshr i64 -1, %numhighbits
675  %masked = and i64 %mask, %val
676  ret i64 %masked
677}
678
679define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwind {
680; X86-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
681; X86-NOBMI2:       # %bb.0:
682; X86-NOBMI2-NEXT:    pushl %esi
683; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
684; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
685; X86-NOBMI2-NEXT:    movl $-1, %eax
686; X86-NOBMI2-NEXT:    movl $-1, %edx
687; X86-NOBMI2-NEXT:    shrl %cl, %edx
688; X86-NOBMI2-NEXT:    shrdl %cl, %eax, %eax
689; X86-NOBMI2-NEXT:    testb $32, %cl
690; X86-NOBMI2-NEXT:    je .LBB16_2
691; X86-NOBMI2-NEXT:  # %bb.1:
692; X86-NOBMI2-NEXT:    movl %edx, %eax
693; X86-NOBMI2-NEXT:    xorl %edx, %edx
694; X86-NOBMI2-NEXT:  .LBB16_2:
695; X86-NOBMI2-NEXT:    andl (%esi), %eax
696; X86-NOBMI2-NEXT:    andl 4(%esi), %edx
697; X86-NOBMI2-NEXT:    popl %esi
698; X86-NOBMI2-NEXT:    retl
699;
700; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
701; X86-BMI2:       # %bb.0:
702; X86-BMI2-NEXT:    pushl %esi
703; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
704; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
705; X86-BMI2-NEXT:    movl $-1, %eax
706; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
707; X86-BMI2-NEXT:    shrdl %cl, %eax, %eax
708; X86-BMI2-NEXT:    testb $32, %cl
709; X86-BMI2-NEXT:    je .LBB16_2
710; X86-BMI2-NEXT:  # %bb.1:
711; X86-BMI2-NEXT:    movl %edx, %eax
712; X86-BMI2-NEXT:    xorl %edx, %edx
713; X86-BMI2-NEXT:  .LBB16_2:
714; X86-BMI2-NEXT:    andl (%esi), %eax
715; X86-BMI2-NEXT:    andl 4(%esi), %edx
716; X86-BMI2-NEXT:    popl %esi
717; X86-BMI2-NEXT:    retl
718;
719; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
720; X64-NOBMI2:       # %bb.0:
721; X64-NOBMI2-NEXT:    movl %esi, %ecx
722; X64-NOBMI2-NEXT:    movq (%rdi), %rax
723; X64-NOBMI2-NEXT:    shlq %cl, %rax
724; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
725; X64-NOBMI2-NEXT:    shrq %cl, %rax
726; X64-NOBMI2-NEXT:    retq
727;
728; X64-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
729; X64-BMI2:       # %bb.0:
730; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
731; X64-BMI2-NEXT:    shlxq %rsi, (%rdi), %rax
732; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
733; X64-BMI2-NEXT:    retq
734  %val = load i64, i64* %w
735  %sh_prom = zext i8 %numhighbits to i64
736  %mask = lshr i64 -1, %sh_prom
737  %masked = and i64 %mask, %val
738  ret i64 %masked
739}
740
741define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind {
742; X86-NOBMI2-LABEL: clear_highbits64_c4_commutative:
743; X86-NOBMI2:       # %bb.0:
744; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
745; X86-NOBMI2-NEXT:    movl $-1, %eax
746; X86-NOBMI2-NEXT:    movl $-1, %edx
747; X86-NOBMI2-NEXT:    shrl %cl, %edx
748; X86-NOBMI2-NEXT:    shrdl %cl, %eax, %eax
749; X86-NOBMI2-NEXT:    testb $32, %cl
750; X86-NOBMI2-NEXT:    je .LBB17_2
751; X86-NOBMI2-NEXT:  # %bb.1:
752; X86-NOBMI2-NEXT:    movl %edx, %eax
753; X86-NOBMI2-NEXT:    xorl %edx, %edx
754; X86-NOBMI2-NEXT:  .LBB17_2:
755; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
756; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
757; X86-NOBMI2-NEXT:    retl
758;
759; X86-BMI2-LABEL: clear_highbits64_c4_commutative:
760; X86-BMI2:       # %bb.0:
761; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
762; X86-BMI2-NEXT:    movl $-1, %eax
763; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
764; X86-BMI2-NEXT:    shrdl %cl, %eax, %eax
765; X86-BMI2-NEXT:    testb $32, %cl
766; X86-BMI2-NEXT:    je .LBB17_2
767; X86-BMI2-NEXT:  # %bb.1:
768; X86-BMI2-NEXT:    movl %edx, %eax
769; X86-BMI2-NEXT:    xorl %edx, %edx
770; X86-BMI2-NEXT:  .LBB17_2:
771; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
772; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
773; X86-BMI2-NEXT:    retl
774;
775; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative:
776; X64-NOBMI2:       # %bb.0:
777; X64-NOBMI2-NEXT:    movq %rsi, %rcx
778; X64-NOBMI2-NEXT:    shlq %cl, %rdi
779; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
780; X64-NOBMI2-NEXT:    shrq %cl, %rdi
781; X64-NOBMI2-NEXT:    movq %rdi, %rax
782; X64-NOBMI2-NEXT:    retq
783;
784; X64-BMI2-LABEL: clear_highbits64_c4_commutative:
785; X64-BMI2:       # %bb.0:
786; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
787; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
788; X64-BMI2-NEXT:    retq
789  %mask = lshr i64 -1, %numhighbits
790  %masked = and i64 %val, %mask ; swapped order
791  ret i64 %masked
792}
793
794; ---------------------------------------------------------------------------- ;
795; Multi-use tests
796; ---------------------------------------------------------------------------- ;
797
798declare void @use32(i32)
799declare void @use64(i64)
800
801define i32 @oneuse32(i32 %val, i32 %numhighbits) nounwind {
802; X86-NOBMI2-LABEL: oneuse32:
803; X86-NOBMI2:       # %bb.0:
804; X86-NOBMI2-NEXT:    pushl %esi
805; X86-NOBMI2-NEXT:    subl $8, %esp
806; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
807; X86-NOBMI2-NEXT:    movl $-1, %esi
808; X86-NOBMI2-NEXT:    shrl %cl, %esi
809; X86-NOBMI2-NEXT:    movl %esi, (%esp)
810; X86-NOBMI2-NEXT:    calll use32
811; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
812; X86-NOBMI2-NEXT:    movl %esi, %eax
813; X86-NOBMI2-NEXT:    addl $8, %esp
814; X86-NOBMI2-NEXT:    popl %esi
815; X86-NOBMI2-NEXT:    retl
816;
817; X86-BMI2-LABEL: oneuse32:
818; X86-BMI2:       # %bb.0:
819; X86-BMI2-NEXT:    pushl %esi
820; X86-BMI2-NEXT:    subl $8, %esp
821; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
822; X86-BMI2-NEXT:    movl $-1, %ecx
823; X86-BMI2-NEXT:    shrxl %eax, %ecx, %esi
824; X86-BMI2-NEXT:    movl %esi, (%esp)
825; X86-BMI2-NEXT:    calll use32
826; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
827; X86-BMI2-NEXT:    movl %esi, %eax
828; X86-BMI2-NEXT:    addl $8, %esp
829; X86-BMI2-NEXT:    popl %esi
830; X86-BMI2-NEXT:    retl
831;
832; X64-NOBMI2-LABEL: oneuse32:
833; X64-NOBMI2:       # %bb.0:
834; X64-NOBMI2-NEXT:    pushq %rbp
835; X64-NOBMI2-NEXT:    pushq %rbx
836; X64-NOBMI2-NEXT:    pushq %rax
837; X64-NOBMI2-NEXT:    movl %edi, %ebx
838; X64-NOBMI2-NEXT:    movl $-1, %ebp
839; X64-NOBMI2-NEXT:    movl %esi, %ecx
840; X64-NOBMI2-NEXT:    shrl %cl, %ebp
841; X64-NOBMI2-NEXT:    movl %ebp, %edi
842; X64-NOBMI2-NEXT:    callq use32
843; X64-NOBMI2-NEXT:    andl %ebx, %ebp
844; X64-NOBMI2-NEXT:    movl %ebp, %eax
845; X64-NOBMI2-NEXT:    addq $8, %rsp
846; X64-NOBMI2-NEXT:    popq %rbx
847; X64-NOBMI2-NEXT:    popq %rbp
848; X64-NOBMI2-NEXT:    retq
849;
850; X64-BMI2-LABEL: oneuse32:
851; X64-BMI2:       # %bb.0:
852; X64-BMI2-NEXT:    pushq %rbp
853; X64-BMI2-NEXT:    pushq %rbx
854; X64-BMI2-NEXT:    pushq %rax
855; X64-BMI2-NEXT:    movl %edi, %ebx
856; X64-BMI2-NEXT:    movl $-1, %eax
857; X64-BMI2-NEXT:    shrxl %esi, %eax, %ebp
858; X64-BMI2-NEXT:    movl %ebp, %edi
859; X64-BMI2-NEXT:    callq use32
860; X64-BMI2-NEXT:    andl %ebx, %ebp
861; X64-BMI2-NEXT:    movl %ebp, %eax
862; X64-BMI2-NEXT:    addq $8, %rsp
863; X64-BMI2-NEXT:    popq %rbx
864; X64-BMI2-NEXT:    popq %rbp
865; X64-BMI2-NEXT:    retq
866  %mask = lshr i32 -1, %numhighbits
867  call void @use32(i32 %mask)
868  %masked = and i32 %mask, %val
869  ret i32 %masked
870}
871
872define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind {
873; X86-NOBMI2-LABEL: oneuse64:
874; X86-NOBMI2:       # %bb.0:
875; X86-NOBMI2-NEXT:    pushl %edi
876; X86-NOBMI2-NEXT:    pushl %esi
877; X86-NOBMI2-NEXT:    pushl %eax
878; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
879; X86-NOBMI2-NEXT:    movl $-1, %esi
880; X86-NOBMI2-NEXT:    movl $-1, %edi
881; X86-NOBMI2-NEXT:    shrl %cl, %edi
882; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %esi
883; X86-NOBMI2-NEXT:    testb $32, %cl
884; X86-NOBMI2-NEXT:    je .LBB19_2
885; X86-NOBMI2-NEXT:  # %bb.1:
886; X86-NOBMI2-NEXT:    movl %edi, %esi
887; X86-NOBMI2-NEXT:    xorl %edi, %edi
888; X86-NOBMI2-NEXT:  .LBB19_2:
889; X86-NOBMI2-NEXT:    subl $8, %esp
890; X86-NOBMI2-NEXT:    pushl %edi
891; X86-NOBMI2-NEXT:    pushl %esi
892; X86-NOBMI2-NEXT:    calll use64
893; X86-NOBMI2-NEXT:    addl $16, %esp
894; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
895; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
896; X86-NOBMI2-NEXT:    movl %esi, %eax
897; X86-NOBMI2-NEXT:    movl %edi, %edx
898; X86-NOBMI2-NEXT:    addl $4, %esp
899; X86-NOBMI2-NEXT:    popl %esi
900; X86-NOBMI2-NEXT:    popl %edi
901; X86-NOBMI2-NEXT:    retl
902;
903; X86-BMI2-LABEL: oneuse64:
904; X86-BMI2:       # %bb.0:
905; X86-BMI2-NEXT:    pushl %edi
906; X86-BMI2-NEXT:    pushl %esi
907; X86-BMI2-NEXT:    pushl %eax
908; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
909; X86-BMI2-NEXT:    movl $-1, %esi
910; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edi
911; X86-BMI2-NEXT:    shrdl %cl, %esi, %esi
912; X86-BMI2-NEXT:    testb $32, %cl
913; X86-BMI2-NEXT:    je .LBB19_2
914; X86-BMI2-NEXT:  # %bb.1:
915; X86-BMI2-NEXT:    movl %edi, %esi
916; X86-BMI2-NEXT:    xorl %edi, %edi
917; X86-BMI2-NEXT:  .LBB19_2:
918; X86-BMI2-NEXT:    subl $8, %esp
919; X86-BMI2-NEXT:    pushl %edi
920; X86-BMI2-NEXT:    pushl %esi
921; X86-BMI2-NEXT:    calll use64
922; X86-BMI2-NEXT:    addl $16, %esp
923; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
924; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
925; X86-BMI2-NEXT:    movl %esi, %eax
926; X86-BMI2-NEXT:    movl %edi, %edx
927; X86-BMI2-NEXT:    addl $4, %esp
928; X86-BMI2-NEXT:    popl %esi
929; X86-BMI2-NEXT:    popl %edi
930; X86-BMI2-NEXT:    retl
931;
932; X64-NOBMI2-LABEL: oneuse64:
933; X64-NOBMI2:       # %bb.0:
934; X64-NOBMI2-NEXT:    pushq %r14
935; X64-NOBMI2-NEXT:    pushq %rbx
936; X64-NOBMI2-NEXT:    pushq %rax
937; X64-NOBMI2-NEXT:    movq %rdi, %r14
938; X64-NOBMI2-NEXT:    movq $-1, %rbx
939; X64-NOBMI2-NEXT:    movl %esi, %ecx
940; X64-NOBMI2-NEXT:    shrq %cl, %rbx
941; X64-NOBMI2-NEXT:    movq %rbx, %rdi
942; X64-NOBMI2-NEXT:    callq use64
943; X64-NOBMI2-NEXT:    andq %r14, %rbx
944; X64-NOBMI2-NEXT:    movq %rbx, %rax
945; X64-NOBMI2-NEXT:    addq $8, %rsp
946; X64-NOBMI2-NEXT:    popq %rbx
947; X64-NOBMI2-NEXT:    popq %r14
948; X64-NOBMI2-NEXT:    retq
949;
950; X64-BMI2-LABEL: oneuse64:
951; X64-BMI2:       # %bb.0:
952; X64-BMI2-NEXT:    pushq %r14
953; X64-BMI2-NEXT:    pushq %rbx
954; X64-BMI2-NEXT:    pushq %rax
955; X64-BMI2-NEXT:    movq %rdi, %r14
956; X64-BMI2-NEXT:    movq $-1, %rax
957; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rbx
958; X64-BMI2-NEXT:    movq %rbx, %rdi
959; X64-BMI2-NEXT:    callq use64
960; X64-BMI2-NEXT:    andq %r14, %rbx
961; X64-BMI2-NEXT:    movq %rbx, %rax
962; X64-BMI2-NEXT:    addq $8, %rsp
963; X64-BMI2-NEXT:    popq %rbx
964; X64-BMI2-NEXT:    popq %r14
965; X64-BMI2-NEXT:    retq
966  %mask = lshr i64 -1, %numhighbits
967  call void @use64(i64 %mask)
968  %masked = and i64 %mask, %val
969  ret i64 %masked
970}
971