1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK0,X86-FALLBACK0 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK1,X86-FALLBACK1 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK2,X86-FALLBACK2 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK3,X86-FALLBACK3 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK4,X86-FALLBACK4 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK0,X64-FALLBACK0 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK1,X64-FALLBACK1 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK2,X64-FALLBACK2 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK3,X64-FALLBACK3 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK4,X64-FALLBACK4 12 13; Patterns: 14; c) x & (-1 << y) 15; ic) x & (-1 << (32 - y)) 16; d) x >> y << y 17; id) x >> (32 - y) << (32 - y) 18; are equivalent, but we prefer the second variant if we have BMI2. 19 20; ---------------------------------------------------------------------------- ; 21; Pattern c. 22; ---------------------------------------------------------------------------- ; 23 24; 8-bit 25 26define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind { 27; X86-LABEL: clear_lowbits8_c0: 28; X86: # %bb.0: 29; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 30; X86-NEXT: movb {{[0-9]+}}(%esp), %al 31; X86-NEXT: shrb %cl, %al 32; X86-NEXT: shlb %cl, %al 33; X86-NEXT: retl 34; 35; X64-LABEL: clear_lowbits8_c0: 36; X64: # %bb.0: 37; X64-NEXT: movl %esi, %ecx 38; X64-NEXT: shrb %cl, %dil 39; X64-NEXT: # kill: def $cl killed $cl killed $ecx 40; X64-NEXT: shlb %cl, %dil 41; X64-NEXT: movl %edi, %eax 42; X64-NEXT: retq 43 %mask = shl i8 -1, %numlowbits 44 %masked = and i8 %mask, %val 45 ret i8 %masked 46} 47 48define i8 @clear_lowbits8_c2_load(i8* %w, i8 %numlowbits) nounwind { 49; X86-LABEL: clear_lowbits8_c2_load: 50; X86: # %bb.0: 51; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 52; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 53; X86-NEXT: movb (%eax), %al 54; X86-NEXT: shrb %cl, %al 55; X86-NEXT: shlb %cl, %al 56; X86-NEXT: retl 57; 58; X64-LABEL: clear_lowbits8_c2_load: 59; X64: # %bb.0: 60; X64-NEXT: movl %esi, %ecx 61; X64-NEXT: movb (%rdi), %al 62; X64-NEXT: shrb %cl, %al 63; X64-NEXT: # kill: def $cl killed $cl killed $ecx 64; X64-NEXT: shlb %cl, %al 65; X64-NEXT: retq 66 %val = load i8, i8* %w 67 %mask = shl i8 -1, %numlowbits 68 %masked = and i8 %mask, %val 69 ret i8 %masked 70} 71 72define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind { 73; X86-LABEL: clear_lowbits8_c4_commutative: 74; X86: # %bb.0: 75; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 76; X86-NEXT: movb {{[0-9]+}}(%esp), %al 77; X86-NEXT: shrb %cl, %al 78; X86-NEXT: shlb %cl, %al 79; X86-NEXT: retl 80; 81; X64-LABEL: clear_lowbits8_c4_commutative: 82; X64: # %bb.0: 83; X64-NEXT: movl %esi, %ecx 84; X64-NEXT: shrb %cl, %dil 85; X64-NEXT: # kill: def $cl killed $cl killed $ecx 86; X64-NEXT: shlb %cl, %dil 87; X64-NEXT: movl %edi, %eax 88; X64-NEXT: retq 89 %mask = shl i8 -1, %numlowbits 90 %masked = and i8 %val, %mask ; swapped order 91 ret i8 %masked 92} 93 94; 16-bit 95 96define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind { 97; X86-NOBMI2-LABEL: clear_lowbits16_c0: 98; X86-NOBMI2: # %bb.0: 99; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 100; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 101; X86-NOBMI2-NEXT: shrl %cl, %eax 102; X86-NOBMI2-NEXT: shll %cl, %eax 103; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 104; X86-NOBMI2-NEXT: retl 105; 106; X86-BMI2-LABEL: clear_lowbits16_c0: 107; X86-BMI2: # %bb.0: 108; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 109; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 110; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 111; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 112; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 113; X86-BMI2-NEXT: retl 114; 115; X64-NOBMI2-LABEL: clear_lowbits16_c0: 116; X64-NOBMI2: # %bb.0: 117; X64-NOBMI2-NEXT: movl %esi, %ecx 118; X64-NOBMI2-NEXT: movzwl %di, %eax 119; X64-NOBMI2-NEXT: shrl %cl, %eax 120; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 121; X64-NOBMI2-NEXT: shll %cl, %eax 122; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 123; X64-NOBMI2-NEXT: retq 124; 125; X64-BMI2-LABEL: clear_lowbits16_c0: 126; X64-BMI2: # %bb.0: 127; X64-BMI2-NEXT: movzwl %di, %eax 128; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 129; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 130; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 131; X64-BMI2-NEXT: retq 132 %mask = shl i16 -1, %numlowbits 133 %masked = and i16 %mask, %val 134 ret i16 %masked 135} 136 137define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind { 138; X86-NOBMI2-LABEL: clear_lowbits16_c1_indexzext: 139; X86-NOBMI2: # %bb.0: 140; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 141; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 142; X86-NOBMI2-NEXT: shrl %cl, %eax 143; X86-NOBMI2-NEXT: shll %cl, %eax 144; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 145; X86-NOBMI2-NEXT: retl 146; 147; X86-BMI2-LABEL: clear_lowbits16_c1_indexzext: 148; X86-BMI2: # %bb.0: 149; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 150; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 151; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 152; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 153; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 154; X86-BMI2-NEXT: retl 155; 156; X64-NOBMI2-LABEL: clear_lowbits16_c1_indexzext: 157; X64-NOBMI2: # %bb.0: 158; X64-NOBMI2-NEXT: movl %esi, %ecx 159; X64-NOBMI2-NEXT: movzwl %di, %eax 160; X64-NOBMI2-NEXT: shrl %cl, %eax 161; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 162; X64-NOBMI2-NEXT: shll %cl, %eax 163; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 164; X64-NOBMI2-NEXT: retq 165; 166; X64-BMI2-LABEL: clear_lowbits16_c1_indexzext: 167; X64-BMI2: # %bb.0: 168; X64-BMI2-NEXT: movzwl %di, %eax 169; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 170; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 171; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 172; X64-BMI2-NEXT: retq 173 %sh_prom = zext i8 %numlowbits to i16 174 %mask = shl i16 -1, %sh_prom 175 %masked = and i16 %mask, %val 176 ret i16 %masked 177} 178 179define i16 @clear_lowbits16_c2_load(i16* %w, i16 %numlowbits) nounwind { 180; X86-NOBMI2-LABEL: clear_lowbits16_c2_load: 181; X86-NOBMI2: # %bb.0: 182; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 183; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 184; X86-NOBMI2-NEXT: movzwl (%eax), %eax 185; X86-NOBMI2-NEXT: shrl %cl, %eax 186; X86-NOBMI2-NEXT: shll %cl, %eax 187; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 188; X86-NOBMI2-NEXT: retl 189; 190; X86-BMI2-LABEL: clear_lowbits16_c2_load: 191; X86-BMI2: # %bb.0: 192; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 193; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 194; X86-BMI2-NEXT: movzwl (%ecx), %ecx 195; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx 196; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 197; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 198; X86-BMI2-NEXT: retl 199; 200; X64-NOBMI2-LABEL: clear_lowbits16_c2_load: 201; X64-NOBMI2: # %bb.0: 202; X64-NOBMI2-NEXT: movl %esi, %ecx 203; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 204; X64-NOBMI2-NEXT: shrl %cl, %eax 205; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 206; X64-NOBMI2-NEXT: shll %cl, %eax 207; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 208; X64-NOBMI2-NEXT: retq 209; 210; X64-BMI2-LABEL: clear_lowbits16_c2_load: 211; X64-BMI2: # %bb.0: 212; X64-BMI2-NEXT: movzwl (%rdi), %eax 213; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 214; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 215; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 216; X64-BMI2-NEXT: retq 217 %val = load i16, i16* %w 218 %mask = shl i16 -1, %numlowbits 219 %masked = and i16 %mask, %val 220 ret i16 %masked 221} 222 223define i16 @clear_lowbits16_c3_load_indexzext(i16* %w, i8 %numlowbits) nounwind { 224; X86-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext: 225; X86-NOBMI2: # %bb.0: 226; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 227; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 228; X86-NOBMI2-NEXT: movzwl (%eax), %eax 229; X86-NOBMI2-NEXT: shrl %cl, %eax 230; X86-NOBMI2-NEXT: shll %cl, %eax 231; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 232; X86-NOBMI2-NEXT: retl 233; 234; X86-BMI2-LABEL: clear_lowbits16_c3_load_indexzext: 235; X86-BMI2: # %bb.0: 236; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 237; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 238; X86-BMI2-NEXT: movzwl (%ecx), %ecx 239; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx 240; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 241; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 242; X86-BMI2-NEXT: retl 243; 244; X64-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext: 245; X64-NOBMI2: # %bb.0: 246; X64-NOBMI2-NEXT: movl %esi, %ecx 247; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 248; X64-NOBMI2-NEXT: shrl %cl, %eax 249; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 250; X64-NOBMI2-NEXT: shll %cl, %eax 251; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 252; X64-NOBMI2-NEXT: retq 253; 254; X64-BMI2-LABEL: clear_lowbits16_c3_load_indexzext: 255; X64-BMI2: # %bb.0: 256; X64-BMI2-NEXT: movzwl (%rdi), %eax 257; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 258; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 259; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 260; X64-BMI2-NEXT: retq 261 %val = load i16, i16* %w 262 %sh_prom = zext i8 %numlowbits to i16 263 %mask = shl i16 -1, %sh_prom 264 %masked = and i16 %mask, %val 265 ret i16 %masked 266} 267 268define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind { 269; X86-NOBMI2-LABEL: clear_lowbits16_c4_commutative: 270; X86-NOBMI2: # %bb.0: 271; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 272; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 273; X86-NOBMI2-NEXT: shrl %cl, %eax 274; X86-NOBMI2-NEXT: shll %cl, %eax 275; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 276; X86-NOBMI2-NEXT: retl 277; 278; X86-BMI2-LABEL: clear_lowbits16_c4_commutative: 279; X86-BMI2: # %bb.0: 280; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 281; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 282; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 283; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 284; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 285; X86-BMI2-NEXT: retl 286; 287; X64-NOBMI2-LABEL: clear_lowbits16_c4_commutative: 288; X64-NOBMI2: # %bb.0: 289; X64-NOBMI2-NEXT: movl %esi, %ecx 290; X64-NOBMI2-NEXT: movzwl %di, %eax 291; X64-NOBMI2-NEXT: shrl %cl, %eax 292; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 293; X64-NOBMI2-NEXT: shll %cl, %eax 294; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 295; X64-NOBMI2-NEXT: retq 296; 297; X64-BMI2-LABEL: clear_lowbits16_c4_commutative: 298; X64-BMI2: # %bb.0: 299; X64-BMI2-NEXT: movzwl %di, %eax 300; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 301; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 302; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 303; X64-BMI2-NEXT: retq 304 %mask = shl i16 -1, %numlowbits 305 %masked = and i16 %val, %mask ; swapped order 306 ret i16 %masked 307} 308 309; 32-bit 310 311define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind { 312; X86-NOBMI2-LABEL: clear_lowbits32_c0: 313; X86-NOBMI2: # %bb.0: 314; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 315; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 316; X86-NOBMI2-NEXT: shrl %cl, %eax 317; X86-NOBMI2-NEXT: shll %cl, %eax 318; X86-NOBMI2-NEXT: retl 319; 320; X86-BMI2-LABEL: clear_lowbits32_c0: 321; X86-BMI2: # %bb.0: 322; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 323; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 324; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 325; X86-BMI2-NEXT: retl 326; 327; X64-NOBMI2-LABEL: clear_lowbits32_c0: 328; X64-NOBMI2: # %bb.0: 329; X64-NOBMI2-NEXT: movl %esi, %ecx 330; X64-NOBMI2-NEXT: shrl %cl, %edi 331; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 332; X64-NOBMI2-NEXT: shll %cl, %edi 333; X64-NOBMI2-NEXT: movl %edi, %eax 334; X64-NOBMI2-NEXT: retq 335; 336; X64-BMI2-LABEL: clear_lowbits32_c0: 337; X64-BMI2: # %bb.0: 338; X64-BMI2-NEXT: shrxl %esi, %edi, %eax 339; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 340; X64-BMI2-NEXT: retq 341 %mask = shl i32 -1, %numlowbits 342 %masked = and i32 %mask, %val 343 ret i32 %masked 344} 345 346define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { 347; X86-NOBMI2-LABEL: clear_lowbits32_c1_indexzext: 348; X86-NOBMI2: # %bb.0: 349; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 350; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 351; X86-NOBMI2-NEXT: shrl %cl, %eax 352; X86-NOBMI2-NEXT: shll %cl, %eax 353; X86-NOBMI2-NEXT: retl 354; 355; X86-BMI2-LABEL: clear_lowbits32_c1_indexzext: 356; X86-BMI2: # %bb.0: 357; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 358; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 359; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 360; X86-BMI2-NEXT: retl 361; 362; X64-NOBMI2-LABEL: clear_lowbits32_c1_indexzext: 363; X64-NOBMI2: # %bb.0: 364; X64-NOBMI2-NEXT: movl %esi, %ecx 365; X64-NOBMI2-NEXT: shrl %cl, %edi 366; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 367; X64-NOBMI2-NEXT: shll %cl, %edi 368; X64-NOBMI2-NEXT: movl %edi, %eax 369; X64-NOBMI2-NEXT: retq 370; 371; X64-BMI2-LABEL: clear_lowbits32_c1_indexzext: 372; X64-BMI2: # %bb.0: 373; X64-BMI2-NEXT: shrxl %esi, %edi, %eax 374; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 375; X64-BMI2-NEXT: retq 376 %sh_prom = zext i8 %numlowbits to i32 377 %mask = shl i32 -1, %sh_prom 378 %masked = and i32 %mask, %val 379 ret i32 %masked 380} 381 382define i32 @clear_lowbits32_c2_load(i32* %w, i32 %numlowbits) nounwind { 383; X86-NOBMI2-LABEL: clear_lowbits32_c2_load: 384; X86-NOBMI2: # %bb.0: 385; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 386; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 387; X86-NOBMI2-NEXT: movl (%eax), %eax 388; X86-NOBMI2-NEXT: shrl %cl, %eax 389; X86-NOBMI2-NEXT: shll %cl, %eax 390; X86-NOBMI2-NEXT: retl 391; 392; X86-BMI2-LABEL: clear_lowbits32_c2_load: 393; X86-BMI2: # %bb.0: 394; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 395; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 396; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 397; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 398; X86-BMI2-NEXT: retl 399; 400; X64-NOBMI2-LABEL: clear_lowbits32_c2_load: 401; X64-NOBMI2: # %bb.0: 402; X64-NOBMI2-NEXT: movl %esi, %ecx 403; X64-NOBMI2-NEXT: movl (%rdi), %eax 404; X64-NOBMI2-NEXT: shrl %cl, %eax 405; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 406; X64-NOBMI2-NEXT: shll %cl, %eax 407; X64-NOBMI2-NEXT: retq 408; 409; X64-BMI2-LABEL: clear_lowbits32_c2_load: 410; X64-BMI2: # %bb.0: 411; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax 412; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 413; X64-BMI2-NEXT: retq 414 %val = load i32, i32* %w 415 %mask = shl i32 -1, %numlowbits 416 %masked = and i32 %mask, %val 417 ret i32 %masked 418} 419 420define i32 @clear_lowbits32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 421; X86-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext: 422; X86-NOBMI2: # %bb.0: 423; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 424; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 425; X86-NOBMI2-NEXT: movl (%eax), %eax 426; X86-NOBMI2-NEXT: shrl %cl, %eax 427; X86-NOBMI2-NEXT: shll %cl, %eax 428; X86-NOBMI2-NEXT: retl 429; 430; X86-BMI2-LABEL: clear_lowbits32_c3_load_indexzext: 431; X86-BMI2: # %bb.0: 432; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 433; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 434; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 435; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 436; X86-BMI2-NEXT: retl 437; 438; X64-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext: 439; X64-NOBMI2: # %bb.0: 440; X64-NOBMI2-NEXT: movl %esi, %ecx 441; X64-NOBMI2-NEXT: movl (%rdi), %eax 442; X64-NOBMI2-NEXT: shrl %cl, %eax 443; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 444; X64-NOBMI2-NEXT: shll %cl, %eax 445; X64-NOBMI2-NEXT: retq 446; 447; X64-BMI2-LABEL: clear_lowbits32_c3_load_indexzext: 448; X64-BMI2: # %bb.0: 449; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax 450; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 451; X64-BMI2-NEXT: retq 452 %val = load i32, i32* %w 453 %sh_prom = zext i8 %numlowbits to i32 454 %mask = shl i32 -1, %sh_prom 455 %masked = and i32 %mask, %val 456 ret i32 %masked 457} 458 459define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { 460; X86-NOBMI2-LABEL: clear_lowbits32_c4_commutative: 461; X86-NOBMI2: # %bb.0: 462; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 463; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 464; X86-NOBMI2-NEXT: shrl %cl, %eax 465; X86-NOBMI2-NEXT: shll %cl, %eax 466; X86-NOBMI2-NEXT: retl 467; 468; X86-BMI2-LABEL: clear_lowbits32_c4_commutative: 469; X86-BMI2: # %bb.0: 470; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 471; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 472; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 473; X86-BMI2-NEXT: retl 474; 475; X64-NOBMI2-LABEL: clear_lowbits32_c4_commutative: 476; X64-NOBMI2: # %bb.0: 477; X64-NOBMI2-NEXT: movl %esi, %ecx 478; X64-NOBMI2-NEXT: shrl %cl, %edi 479; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 480; X64-NOBMI2-NEXT: shll %cl, %edi 481; X64-NOBMI2-NEXT: movl %edi, %eax 482; X64-NOBMI2-NEXT: retq 483; 484; X64-BMI2-LABEL: clear_lowbits32_c4_commutative: 485; X64-BMI2: # %bb.0: 486; X64-BMI2-NEXT: shrxl %esi, %edi, %eax 487; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 488; X64-BMI2-NEXT: retq 489 %mask = shl i32 -1, %numlowbits 490 %masked = and i32 %val, %mask ; swapped order 491 ret i32 %masked 492} 493 494; 64-bit 495 496define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind { 497; X86-NOBMI2-LABEL: clear_lowbits64_c0: 498; X86-NOBMI2: # %bb.0: 499; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 500; X86-NOBMI2-NEXT: movl $-1, %edx 501; X86-NOBMI2-NEXT: movl $-1, %eax 502; X86-NOBMI2-NEXT: shll %cl, %eax 503; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 504; X86-NOBMI2-NEXT: testb $32, %cl 505; X86-NOBMI2-NEXT: je .LBB13_2 506; X86-NOBMI2-NEXT: # %bb.1: 507; X86-NOBMI2-NEXT: movl %eax, %edx 508; X86-NOBMI2-NEXT: xorl %eax, %eax 509; X86-NOBMI2-NEXT: .LBB13_2: 510; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 511; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 512; X86-NOBMI2-NEXT: retl 513; 514; X86-BMI2-LABEL: clear_lowbits64_c0: 515; X86-BMI2: # %bb.0: 516; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 517; X86-BMI2-NEXT: movl $-1, %edx 518; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 519; X86-BMI2-NEXT: shldl %cl, %edx, %edx 520; X86-BMI2-NEXT: testb $32, %cl 521; X86-BMI2-NEXT: je .LBB13_2 522; X86-BMI2-NEXT: # %bb.1: 523; X86-BMI2-NEXT: movl %eax, %edx 524; X86-BMI2-NEXT: xorl %eax, %eax 525; X86-BMI2-NEXT: .LBB13_2: 526; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 527; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 528; X86-BMI2-NEXT: retl 529; 530; X64-NOBMI2-LABEL: clear_lowbits64_c0: 531; X64-NOBMI2: # %bb.0: 532; X64-NOBMI2-NEXT: movq %rsi, %rcx 533; X64-NOBMI2-NEXT: shrq %cl, %rdi 534; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 535; X64-NOBMI2-NEXT: shlq %cl, %rdi 536; X64-NOBMI2-NEXT: movq %rdi, %rax 537; X64-NOBMI2-NEXT: retq 538; 539; X64-BMI2-LABEL: clear_lowbits64_c0: 540; X64-BMI2: # %bb.0: 541; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax 542; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 543; X64-BMI2-NEXT: retq 544 %mask = shl i64 -1, %numlowbits 545 %masked = and i64 %mask, %val 546 ret i64 %masked 547} 548 549define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { 550; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: 551; X86-NOBMI2: # %bb.0: 552; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 553; X86-NOBMI2-NEXT: movl $-1, %edx 554; X86-NOBMI2-NEXT: movl $-1, %eax 555; X86-NOBMI2-NEXT: shll %cl, %eax 556; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 557; X86-NOBMI2-NEXT: testb $32, %cl 558; X86-NOBMI2-NEXT: je .LBB14_2 559; X86-NOBMI2-NEXT: # %bb.1: 560; X86-NOBMI2-NEXT: movl %eax, %edx 561; X86-NOBMI2-NEXT: xorl %eax, %eax 562; X86-NOBMI2-NEXT: .LBB14_2: 563; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 564; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 565; X86-NOBMI2-NEXT: retl 566; 567; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext: 568; X86-BMI2: # %bb.0: 569; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 570; X86-BMI2-NEXT: movl $-1, %edx 571; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 572; X86-BMI2-NEXT: shldl %cl, %edx, %edx 573; X86-BMI2-NEXT: testb $32, %cl 574; X86-BMI2-NEXT: je .LBB14_2 575; X86-BMI2-NEXT: # %bb.1: 576; X86-BMI2-NEXT: movl %eax, %edx 577; X86-BMI2-NEXT: xorl %eax, %eax 578; X86-BMI2-NEXT: .LBB14_2: 579; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 580; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 581; X86-BMI2-NEXT: retl 582; 583; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: 584; X64-NOBMI2: # %bb.0: 585; X64-NOBMI2-NEXT: movl %esi, %ecx 586; X64-NOBMI2-NEXT: shrq %cl, %rdi 587; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 588; X64-NOBMI2-NEXT: shlq %cl, %rdi 589; X64-NOBMI2-NEXT: movq %rdi, %rax 590; X64-NOBMI2-NEXT: retq 591; 592; X64-BMI2-LABEL: clear_lowbits64_c1_indexzext: 593; X64-BMI2: # %bb.0: 594; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 595; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax 596; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 597; X64-BMI2-NEXT: retq 598 %sh_prom = zext i8 %numlowbits to i64 599 %mask = shl i64 -1, %sh_prom 600 %masked = and i64 %mask, %val 601 ret i64 %masked 602} 603 604define i64 @clear_lowbits64_c2_load(i64* %w, i64 %numlowbits) nounwind { 605; X86-NOBMI2-LABEL: clear_lowbits64_c2_load: 606; X86-NOBMI2: # %bb.0: 607; X86-NOBMI2-NEXT: pushl %esi 608; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 609; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 610; X86-NOBMI2-NEXT: movl $-1, %edx 611; X86-NOBMI2-NEXT: movl $-1, %eax 612; X86-NOBMI2-NEXT: shll %cl, %eax 613; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 614; X86-NOBMI2-NEXT: testb $32, %cl 615; X86-NOBMI2-NEXT: je .LBB15_2 616; X86-NOBMI2-NEXT: # %bb.1: 617; X86-NOBMI2-NEXT: movl %eax, %edx 618; X86-NOBMI2-NEXT: xorl %eax, %eax 619; X86-NOBMI2-NEXT: .LBB15_2: 620; X86-NOBMI2-NEXT: andl 4(%esi), %edx 621; X86-NOBMI2-NEXT: andl (%esi), %eax 622; X86-NOBMI2-NEXT: popl %esi 623; X86-NOBMI2-NEXT: retl 624; 625; X86-BMI2-LABEL: clear_lowbits64_c2_load: 626; X86-BMI2: # %bb.0: 627; X86-BMI2-NEXT: pushl %esi 628; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 629; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 630; X86-BMI2-NEXT: movl $-1, %edx 631; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 632; X86-BMI2-NEXT: shldl %cl, %edx, %edx 633; X86-BMI2-NEXT: testb $32, %cl 634; X86-BMI2-NEXT: je .LBB15_2 635; X86-BMI2-NEXT: # %bb.1: 636; X86-BMI2-NEXT: movl %eax, %edx 637; X86-BMI2-NEXT: xorl %eax, %eax 638; X86-BMI2-NEXT: .LBB15_2: 639; X86-BMI2-NEXT: andl 4(%esi), %edx 640; X86-BMI2-NEXT: andl (%esi), %eax 641; X86-BMI2-NEXT: popl %esi 642; X86-BMI2-NEXT: retl 643; 644; X64-NOBMI2-LABEL: clear_lowbits64_c2_load: 645; X64-NOBMI2: # %bb.0: 646; X64-NOBMI2-NEXT: movq %rsi, %rcx 647; X64-NOBMI2-NEXT: movq (%rdi), %rax 648; X64-NOBMI2-NEXT: shrq %cl, %rax 649; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 650; X64-NOBMI2-NEXT: shlq %cl, %rax 651; X64-NOBMI2-NEXT: retq 652; 653; X64-BMI2-LABEL: clear_lowbits64_c2_load: 654; X64-BMI2: # %bb.0: 655; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax 656; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 657; X64-BMI2-NEXT: retq 658 %val = load i64, i64* %w 659 %mask = shl i64 -1, %numlowbits 660 %masked = and i64 %mask, %val 661 ret i64 %masked 662} 663 664define i64 @clear_lowbits64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 665; X86-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext: 666; X86-NOBMI2: # %bb.0: 667; X86-NOBMI2-NEXT: pushl %esi 668; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 669; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 670; X86-NOBMI2-NEXT: movl $-1, %edx 671; X86-NOBMI2-NEXT: movl $-1, %eax 672; X86-NOBMI2-NEXT: shll %cl, %eax 673; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 674; X86-NOBMI2-NEXT: testb $32, %cl 675; X86-NOBMI2-NEXT: je .LBB16_2 676; X86-NOBMI2-NEXT: # %bb.1: 677; X86-NOBMI2-NEXT: movl %eax, %edx 678; X86-NOBMI2-NEXT: xorl %eax, %eax 679; X86-NOBMI2-NEXT: .LBB16_2: 680; X86-NOBMI2-NEXT: andl 4(%esi), %edx 681; X86-NOBMI2-NEXT: andl (%esi), %eax 682; X86-NOBMI2-NEXT: popl %esi 683; X86-NOBMI2-NEXT: retl 684; 685; X86-BMI2-LABEL: clear_lowbits64_c3_load_indexzext: 686; X86-BMI2: # %bb.0: 687; X86-BMI2-NEXT: pushl %esi 688; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 689; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 690; X86-BMI2-NEXT: movl $-1, %edx 691; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 692; X86-BMI2-NEXT: shldl %cl, %edx, %edx 693; X86-BMI2-NEXT: testb $32, %cl 694; X86-BMI2-NEXT: je .LBB16_2 695; X86-BMI2-NEXT: # %bb.1: 696; X86-BMI2-NEXT: movl %eax, %edx 697; X86-BMI2-NEXT: xorl %eax, %eax 698; X86-BMI2-NEXT: .LBB16_2: 699; X86-BMI2-NEXT: andl 4(%esi), %edx 700; X86-BMI2-NEXT: andl (%esi), %eax 701; X86-BMI2-NEXT: popl %esi 702; X86-BMI2-NEXT: retl 703; 704; X64-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext: 705; X64-NOBMI2: # %bb.0: 706; X64-NOBMI2-NEXT: movl %esi, %ecx 707; X64-NOBMI2-NEXT: movq (%rdi), %rax 708; X64-NOBMI2-NEXT: shrq %cl, %rax 709; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 710; X64-NOBMI2-NEXT: shlq %cl, %rax 711; X64-NOBMI2-NEXT: retq 712; 713; X64-BMI2-LABEL: clear_lowbits64_c3_load_indexzext: 714; X64-BMI2: # %bb.0: 715; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 716; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax 717; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 718; X64-BMI2-NEXT: retq 719 %val = load i64, i64* %w 720 %sh_prom = zext i8 %numlowbits to i64 721 %mask = shl i64 -1, %sh_prom 722 %masked = and i64 %mask, %val 723 ret i64 %masked 724} 725 726define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { 727; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative: 728; X86-NOBMI2: # %bb.0: 729; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 730; X86-NOBMI2-NEXT: movl $-1, %edx 731; X86-NOBMI2-NEXT: movl $-1, %eax 732; X86-NOBMI2-NEXT: shll %cl, %eax 733; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 734; X86-NOBMI2-NEXT: testb $32, %cl 735; X86-NOBMI2-NEXT: je .LBB17_2 736; X86-NOBMI2-NEXT: # %bb.1: 737; X86-NOBMI2-NEXT: movl %eax, %edx 738; X86-NOBMI2-NEXT: xorl %eax, %eax 739; X86-NOBMI2-NEXT: .LBB17_2: 740; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 741; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 742; X86-NOBMI2-NEXT: retl 743; 744; X86-BMI2-LABEL: clear_lowbits64_c4_commutative: 745; X86-BMI2: # %bb.0: 746; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 747; X86-BMI2-NEXT: movl $-1, %edx 748; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 749; X86-BMI2-NEXT: shldl %cl, %edx, %edx 750; X86-BMI2-NEXT: testb $32, %cl 751; X86-BMI2-NEXT: je .LBB17_2 752; X86-BMI2-NEXT: # %bb.1: 753; X86-BMI2-NEXT: movl %eax, %edx 754; X86-BMI2-NEXT: xorl %eax, %eax 755; X86-BMI2-NEXT: .LBB17_2: 756; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 757; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 758; X86-BMI2-NEXT: retl 759; 760; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative: 761; X64-NOBMI2: # %bb.0: 762; X64-NOBMI2-NEXT: movq %rsi, %rcx 763; X64-NOBMI2-NEXT: shrq %cl, %rdi 764; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 765; X64-NOBMI2-NEXT: shlq %cl, %rdi 766; X64-NOBMI2-NEXT: movq %rdi, %rax 767; X64-NOBMI2-NEXT: retq 768; 769; X64-BMI2-LABEL: clear_lowbits64_c4_commutative: 770; X64-BMI2: # %bb.0: 771; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax 772; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax 773; X64-BMI2-NEXT: retq 774 %mask = shl i64 -1, %numlowbits 775 %masked = and i64 %val, %mask ; swapped order 776 ret i64 %masked 777} 778 779; ---------------------------------------------------------------------------- ; 780; Pattern ic. 781; ---------------------------------------------------------------------------- ; 782 783; 8-bit 784 785define i8 @clear_lowbits8_ic0(i8 %val, i8 %numlowbits) nounwind { 786; X86-LABEL: clear_lowbits8_ic0: 787; X86: # %bb.0: 788; X86-NEXT: movb {{[0-9]+}}(%esp), %al 789; X86-NEXT: movb $8, %cl 790; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 791; X86-NEXT: shrb %cl, %al 792; X86-NEXT: shlb %cl, %al 793; X86-NEXT: retl 794; 795; X64-LABEL: clear_lowbits8_ic0: 796; X64: # %bb.0: 797; X64-NEXT: movb $8, %cl 798; X64-NEXT: subb %sil, %cl 799; X64-NEXT: shrb %cl, %dil 800; X64-NEXT: shlb %cl, %dil 801; X64-NEXT: movl %edi, %eax 802; X64-NEXT: retq 803 %numhighbits = sub i8 8, %numlowbits 804 %mask = shl i8 -1, %numhighbits 805 %masked = and i8 %mask, %val 806 ret i8 %masked 807} 808 809define i8 @clear_lowbits8_ic2_load(i8* %w, i8 %numlowbits) nounwind { 810; X86-LABEL: clear_lowbits8_ic2_load: 811; X86: # %bb.0: 812; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 813; X86-NEXT: movb (%eax), %al 814; X86-NEXT: movb $8, %cl 815; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 816; X86-NEXT: shrb %cl, %al 817; X86-NEXT: shlb %cl, %al 818; X86-NEXT: retl 819; 820; X64-LABEL: clear_lowbits8_ic2_load: 821; X64: # %bb.0: 822; X64-NEXT: movb (%rdi), %al 823; X64-NEXT: movb $8, %cl 824; X64-NEXT: subb %sil, %cl 825; X64-NEXT: shrb %cl, %al 826; X64-NEXT: shlb %cl, %al 827; X64-NEXT: retq 828 %val = load i8, i8* %w 829 %numhighbits = sub i8 8, %numlowbits 830 %mask = shl i8 -1, %numhighbits 831 %masked = and i8 %mask, %val 832 ret i8 %masked 833} 834 835define i8 @clear_lowbits8_ic4_commutative(i8 %val, i8 %numlowbits) nounwind { 836; X86-LABEL: clear_lowbits8_ic4_commutative: 837; X86: # %bb.0: 838; X86-NEXT: movb {{[0-9]+}}(%esp), %al 839; X86-NEXT: movb $8, %cl 840; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 841; X86-NEXT: shrb %cl, %al 842; X86-NEXT: shlb %cl, %al 843; X86-NEXT: retl 844; 845; X64-LABEL: clear_lowbits8_ic4_commutative: 846; X64: # %bb.0: 847; X64-NEXT: movb $8, %cl 848; X64-NEXT: subb %sil, %cl 849; X64-NEXT: shrb %cl, %dil 850; X64-NEXT: shlb %cl, %dil 851; X64-NEXT: movl %edi, %eax 852; X64-NEXT: retq 853 %numhighbits = sub i8 8, %numlowbits 854 %mask = shl i8 -1, %numhighbits 855 %masked = and i8 %val, %mask ; swapped order 856 ret i8 %masked 857} 858 859; 16-bit 860 861define i16 @clear_lowbits16_ic0(i16 %val, i16 %numlowbits) nounwind { 862; X86-NOBMI2-LABEL: clear_lowbits16_ic0: 863; X86-NOBMI2: # %bb.0: 864; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 865; X86-NOBMI2-NEXT: movw $16, %cx 866; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 867; X86-NOBMI2-NEXT: shrl %cl, %eax 868; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx 869; X86-NOBMI2-NEXT: shll %cl, %eax 870; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 871; X86-NOBMI2-NEXT: retl 872; 873; X86-BMI2-LABEL: clear_lowbits16_ic0: 874; X86-BMI2: # %bb.0: 875; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 876; X86-BMI2-NEXT: movw $16, %cx 877; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 878; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 879; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 880; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 881; X86-BMI2-NEXT: retl 882; 883; X64-NOBMI2-LABEL: clear_lowbits16_ic0: 884; X64-NOBMI2: # %bb.0: 885; X64-NOBMI2-NEXT: movzwl %di, %eax 886; X64-NOBMI2-NEXT: movl $16, %ecx 887; X64-NOBMI2-NEXT: subl %esi, %ecx 888; X64-NOBMI2-NEXT: shrl %cl, %eax 889; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 890; X64-NOBMI2-NEXT: shll %cl, %eax 891; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 892; X64-NOBMI2-NEXT: retq 893; 894; X64-BMI2-LABEL: clear_lowbits16_ic0: 895; X64-BMI2: # %bb.0: 896; X64-BMI2-NEXT: movzwl %di, %eax 897; X64-BMI2-NEXT: movl $16, %ecx 898; X64-BMI2-NEXT: subl %esi, %ecx 899; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 900; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 901; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 902; X64-BMI2-NEXT: retq 903 %numhighbits = sub i16 16, %numlowbits 904 %mask = shl i16 -1, %numhighbits 905 %masked = and i16 %mask, %val 906 ret i16 %masked 907} 908 909define i16 @clear_lowbits16_ic1_indexzext(i16 %val, i8 %numlowbits) nounwind { 910; X86-NOBMI2-LABEL: clear_lowbits16_ic1_indexzext: 911; X86-NOBMI2: # %bb.0: 912; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 913; X86-NOBMI2-NEXT: movb $16, %cl 914; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 915; X86-NOBMI2-NEXT: shrl %cl, %eax 916; X86-NOBMI2-NEXT: shll %cl, %eax 917; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 918; X86-NOBMI2-NEXT: retl 919; 920; X86-BMI2-LABEL: clear_lowbits16_ic1_indexzext: 921; X86-BMI2: # %bb.0: 922; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 923; X86-BMI2-NEXT: movb $16, %cl 924; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 925; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 926; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 927; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 928; X86-BMI2-NEXT: retl 929; 930; X64-NOBMI2-LABEL: clear_lowbits16_ic1_indexzext: 931; X64-NOBMI2: # %bb.0: 932; X64-NOBMI2-NEXT: movzwl %di, %eax 933; X64-NOBMI2-NEXT: movb $16, %cl 934; X64-NOBMI2-NEXT: subb %sil, %cl 935; X64-NOBMI2-NEXT: shrl %cl, %eax 936; X64-NOBMI2-NEXT: shll %cl, %eax 937; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 938; X64-NOBMI2-NEXT: retq 939; 940; X64-BMI2-LABEL: clear_lowbits16_ic1_indexzext: 941; X64-BMI2: # %bb.0: 942; X64-BMI2-NEXT: movzwl %di, %eax 943; X64-BMI2-NEXT: movb $16, %cl 944; X64-BMI2-NEXT: subb %sil, %cl 945; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 946; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 947; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 948; X64-BMI2-NEXT: retq 949 %numhighbits = sub i8 16, %numlowbits 950 %sh_prom = zext i8 %numhighbits to i16 951 %mask = shl i16 -1, %sh_prom 952 %masked = and i16 %mask, %val 953 ret i16 %masked 954} 955 956define i16 @clear_lowbits16_ic2_load(i16* %w, i16 %numlowbits) nounwind { 957; X86-NOBMI2-LABEL: clear_lowbits16_ic2_load: 958; X86-NOBMI2: # %bb.0: 959; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 960; X86-NOBMI2-NEXT: movzwl (%eax), %eax 961; X86-NOBMI2-NEXT: movw $16, %cx 962; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 963; X86-NOBMI2-NEXT: shrl %cl, %eax 964; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx 965; X86-NOBMI2-NEXT: shll %cl, %eax 966; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 967; X86-NOBMI2-NEXT: retl 968; 969; X86-BMI2-LABEL: clear_lowbits16_ic2_load: 970; X86-BMI2: # %bb.0: 971; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 972; X86-BMI2-NEXT: movzwl (%eax), %eax 973; X86-BMI2-NEXT: movw $16, %cx 974; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 975; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 976; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 977; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 978; X86-BMI2-NEXT: retl 979; 980; X64-NOBMI2-LABEL: clear_lowbits16_ic2_load: 981; X64-NOBMI2: # %bb.0: 982; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 983; X64-NOBMI2-NEXT: movl $16, %ecx 984; X64-NOBMI2-NEXT: subl %esi, %ecx 985; X64-NOBMI2-NEXT: shrl %cl, %eax 986; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 987; X64-NOBMI2-NEXT: shll %cl, %eax 988; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 989; X64-NOBMI2-NEXT: retq 990; 991; X64-BMI2-LABEL: clear_lowbits16_ic2_load: 992; X64-BMI2: # %bb.0: 993; X64-BMI2-NEXT: movzwl (%rdi), %eax 994; X64-BMI2-NEXT: movl $16, %ecx 995; X64-BMI2-NEXT: subl %esi, %ecx 996; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 997; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 998; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 999; X64-BMI2-NEXT: retq 1000 %val = load i16, i16* %w 1001 %numhighbits = sub i16 16, %numlowbits 1002 %mask = shl i16 -1, %numhighbits 1003 %masked = and i16 %mask, %val 1004 ret i16 %masked 1005} 1006 1007define i16 @clear_lowbits16_ic3_load_indexzext(i16* %w, i8 %numlowbits) nounwind { 1008; X86-NOBMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1009; X86-NOBMI2: # %bb.0: 1010; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1011; X86-NOBMI2-NEXT: movzwl (%eax), %eax 1012; X86-NOBMI2-NEXT: movb $16, %cl 1013; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1014; X86-NOBMI2-NEXT: shrl %cl, %eax 1015; X86-NOBMI2-NEXT: shll %cl, %eax 1016; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1017; X86-NOBMI2-NEXT: retl 1018; 1019; X86-BMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1020; X86-BMI2: # %bb.0: 1021; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1022; X86-BMI2-NEXT: movzwl (%eax), %eax 1023; X86-BMI2-NEXT: movb $16, %cl 1024; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1025; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 1026; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1027; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1028; X86-BMI2-NEXT: retl 1029; 1030; X64-NOBMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1031; X64-NOBMI2: # %bb.0: 1032; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 1033; X64-NOBMI2-NEXT: movb $16, %cl 1034; X64-NOBMI2-NEXT: subb %sil, %cl 1035; X64-NOBMI2-NEXT: shrl %cl, %eax 1036; X64-NOBMI2-NEXT: shll %cl, %eax 1037; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1038; X64-NOBMI2-NEXT: retq 1039; 1040; X64-BMI2-LABEL: clear_lowbits16_ic3_load_indexzext: 1041; X64-BMI2: # %bb.0: 1042; X64-BMI2-NEXT: movzwl (%rdi), %eax 1043; X64-BMI2-NEXT: movb $16, %cl 1044; X64-BMI2-NEXT: subb %sil, %cl 1045; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 1046; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 1047; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1048; X64-BMI2-NEXT: retq 1049 %val = load i16, i16* %w 1050 %numhighbits = sub i8 16, %numlowbits 1051 %sh_prom = zext i8 %numhighbits to i16 1052 %mask = shl i16 -1, %sh_prom 1053 %masked = and i16 %mask, %val 1054 ret i16 %masked 1055} 1056 1057define i16 @clear_lowbits16_ic4_commutative(i16 %val, i16 %numlowbits) nounwind { 1058; X86-NOBMI2-LABEL: clear_lowbits16_ic4_commutative: 1059; X86-NOBMI2: # %bb.0: 1060; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1061; X86-NOBMI2-NEXT: movw $16, %cx 1062; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 1063; X86-NOBMI2-NEXT: shrl %cl, %eax 1064; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx 1065; X86-NOBMI2-NEXT: shll %cl, %eax 1066; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1067; X86-NOBMI2-NEXT: retl 1068; 1069; X86-BMI2-LABEL: clear_lowbits16_ic4_commutative: 1070; X86-BMI2: # %bb.0: 1071; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1072; X86-BMI2-NEXT: movw $16, %cx 1073; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx 1074; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 1075; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1076; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1077; X86-BMI2-NEXT: retl 1078; 1079; X64-NOBMI2-LABEL: clear_lowbits16_ic4_commutative: 1080; X64-NOBMI2: # %bb.0: 1081; X64-NOBMI2-NEXT: movzwl %di, %eax 1082; X64-NOBMI2-NEXT: movl $16, %ecx 1083; X64-NOBMI2-NEXT: subl %esi, %ecx 1084; X64-NOBMI2-NEXT: shrl %cl, %eax 1085; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1086; X64-NOBMI2-NEXT: shll %cl, %eax 1087; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 1088; X64-NOBMI2-NEXT: retq 1089; 1090; X64-BMI2-LABEL: clear_lowbits16_ic4_commutative: 1091; X64-BMI2: # %bb.0: 1092; X64-BMI2-NEXT: movzwl %di, %eax 1093; X64-BMI2-NEXT: movl $16, %ecx 1094; X64-BMI2-NEXT: subl %esi, %ecx 1095; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax 1096; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax 1097; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 1098; X64-BMI2-NEXT: retq 1099 %numhighbits = sub i16 16, %numlowbits 1100 %mask = shl i16 -1, %numhighbits 1101 %masked = and i16 %val, %mask ; swapped order 1102 ret i16 %masked 1103} 1104 1105; 32-bit 1106 1107define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind { 1108; X86-NOBMI2-LABEL: clear_lowbits32_ic0: 1109; X86-NOBMI2: # %bb.0: 1110; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1111; X86-NOBMI2-NEXT: movl $32, %ecx 1112; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1113; X86-NOBMI2-NEXT: shrl %cl, %eax 1114; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1115; X86-NOBMI2-NEXT: shll %cl, %eax 1116; X86-NOBMI2-NEXT: retl 1117; 1118; X86-BMI2-LABEL: clear_lowbits32_ic0: 1119; X86-BMI2: # %bb.0: 1120; X86-BMI2-NEXT: movl $32, %eax 1121; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax 1122; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 1123; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 1124; X86-BMI2-NEXT: retl 1125; 1126; X64-NOBMI2-LABEL: clear_lowbits32_ic0: 1127; X64-NOBMI2: # %bb.0: 1128; X64-NOBMI2-NEXT: movl $32, %ecx 1129; X64-NOBMI2-NEXT: subl %esi, %ecx 1130; X64-NOBMI2-NEXT: shrl %cl, %edi 1131; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1132; X64-NOBMI2-NEXT: shll %cl, %edi 1133; X64-NOBMI2-NEXT: movl %edi, %eax 1134; X64-NOBMI2-NEXT: retq 1135; 1136; X64-BMI2-LABEL: clear_lowbits32_ic0: 1137; X64-BMI2: # %bb.0: 1138; X64-BMI2-NEXT: movl $32, %eax 1139; X64-BMI2-NEXT: subl %esi, %eax 1140; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx 1141; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1142; X64-BMI2-NEXT: retq 1143 %numhighbits = sub i32 32, %numlowbits 1144 %mask = shl i32 -1, %numhighbits 1145 %masked = and i32 %mask, %val 1146 ret i32 %masked 1147} 1148 1149define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind { 1150; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: 1151; X86-NOBMI2: # %bb.0: 1152; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1153; X86-NOBMI2-NEXT: movb $32, %cl 1154; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1155; X86-NOBMI2-NEXT: shrl %cl, %eax 1156; X86-NOBMI2-NEXT: shll %cl, %eax 1157; X86-NOBMI2-NEXT: retl 1158; 1159; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext: 1160; X86-BMI2: # %bb.0: 1161; X86-BMI2-NEXT: movb $32, %al 1162; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 1163; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 1164; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 1165; X86-BMI2-NEXT: retl 1166; 1167; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: 1168; X64-NOBMI2: # %bb.0: 1169; X64-NOBMI2-NEXT: movb $32, %cl 1170; X64-NOBMI2-NEXT: subb %sil, %cl 1171; X64-NOBMI2-NEXT: shrl %cl, %edi 1172; X64-NOBMI2-NEXT: shll %cl, %edi 1173; X64-NOBMI2-NEXT: movl %edi, %eax 1174; X64-NOBMI2-NEXT: retq 1175; 1176; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext: 1177; X64-BMI2: # %bb.0: 1178; X64-BMI2-NEXT: movb $32, %al 1179; X64-BMI2-NEXT: subb %sil, %al 1180; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx 1181; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1182; X64-BMI2-NEXT: retq 1183 %numhighbits = sub i8 32, %numlowbits 1184 %sh_prom = zext i8 %numhighbits to i32 1185 %mask = shl i32 -1, %sh_prom 1186 %masked = and i32 %mask, %val 1187 ret i32 %masked 1188} 1189 1190define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind { 1191; X86-NOBMI2-LABEL: clear_lowbits32_ic2_load: 1192; X86-NOBMI2: # %bb.0: 1193; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1194; X86-NOBMI2-NEXT: movl (%eax), %eax 1195; X86-NOBMI2-NEXT: movl $32, %ecx 1196; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1197; X86-NOBMI2-NEXT: shrl %cl, %eax 1198; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1199; X86-NOBMI2-NEXT: shll %cl, %eax 1200; X86-NOBMI2-NEXT: retl 1201; 1202; X86-BMI2-LABEL: clear_lowbits32_ic2_load: 1203; X86-BMI2: # %bb.0: 1204; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1205; X86-BMI2-NEXT: movl $32, %ecx 1206; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1207; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 1208; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1209; X86-BMI2-NEXT: retl 1210; 1211; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load: 1212; X64-NOBMI2: # %bb.0: 1213; X64-NOBMI2-NEXT: movl (%rdi), %eax 1214; X64-NOBMI2-NEXT: movl $32, %ecx 1215; X64-NOBMI2-NEXT: subl %esi, %ecx 1216; X64-NOBMI2-NEXT: shrl %cl, %eax 1217; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1218; X64-NOBMI2-NEXT: shll %cl, %eax 1219; X64-NOBMI2-NEXT: retq 1220; 1221; X64-BMI2-LABEL: clear_lowbits32_ic2_load: 1222; X64-BMI2: # %bb.0: 1223; X64-BMI2-NEXT: movl $32, %eax 1224; X64-BMI2-NEXT: subl %esi, %eax 1225; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx 1226; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1227; X64-BMI2-NEXT: retq 1228 %val = load i32, i32* %w 1229 %numhighbits = sub i32 32, %numlowbits 1230 %mask = shl i32 -1, %numhighbits 1231 %masked = and i32 %mask, %val 1232 ret i32 %masked 1233} 1234 1235define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 1236; X86-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1237; X86-NOBMI2: # %bb.0: 1238; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1239; X86-NOBMI2-NEXT: movl (%eax), %eax 1240; X86-NOBMI2-NEXT: movb $32, %cl 1241; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1242; X86-NOBMI2-NEXT: shrl %cl, %eax 1243; X86-NOBMI2-NEXT: shll %cl, %eax 1244; X86-NOBMI2-NEXT: retl 1245; 1246; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1247; X86-BMI2: # %bb.0: 1248; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1249; X86-BMI2-NEXT: movb $32, %cl 1250; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1251; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax 1252; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1253; X86-BMI2-NEXT: retl 1254; 1255; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1256; X64-NOBMI2: # %bb.0: 1257; X64-NOBMI2-NEXT: movl (%rdi), %eax 1258; X64-NOBMI2-NEXT: movb $32, %cl 1259; X64-NOBMI2-NEXT: subb %sil, %cl 1260; X64-NOBMI2-NEXT: shrl %cl, %eax 1261; X64-NOBMI2-NEXT: shll %cl, %eax 1262; X64-NOBMI2-NEXT: retq 1263; 1264; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: 1265; X64-BMI2: # %bb.0: 1266; X64-BMI2-NEXT: movb $32, %al 1267; X64-BMI2-NEXT: subb %sil, %al 1268; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx 1269; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1270; X64-BMI2-NEXT: retq 1271 %val = load i32, i32* %w 1272 %numhighbits = sub i8 32, %numlowbits 1273 %sh_prom = zext i8 %numhighbits to i32 1274 %mask = shl i32 -1, %sh_prom 1275 %masked = and i32 %mask, %val 1276 ret i32 %masked 1277} 1278 1279define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind { 1280; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: 1281; X86-NOBMI2: # %bb.0: 1282; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1283; X86-NOBMI2-NEXT: movl $32, %ecx 1284; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1285; X86-NOBMI2-NEXT: shrl %cl, %eax 1286; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1287; X86-NOBMI2-NEXT: shll %cl, %eax 1288; X86-NOBMI2-NEXT: retl 1289; 1290; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative: 1291; X86-BMI2: # %bb.0: 1292; X86-BMI2-NEXT: movl $32, %eax 1293; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax 1294; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx 1295; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax 1296; X86-BMI2-NEXT: retl 1297; 1298; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: 1299; X64-NOBMI2: # %bb.0: 1300; X64-NOBMI2-NEXT: movl $32, %ecx 1301; X64-NOBMI2-NEXT: subl %esi, %ecx 1302; X64-NOBMI2-NEXT: shrl %cl, %edi 1303; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1304; X64-NOBMI2-NEXT: shll %cl, %edi 1305; X64-NOBMI2-NEXT: movl %edi, %eax 1306; X64-NOBMI2-NEXT: retq 1307; 1308; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative: 1309; X64-BMI2: # %bb.0: 1310; X64-BMI2-NEXT: movl $32, %eax 1311; X64-BMI2-NEXT: subl %esi, %eax 1312; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx 1313; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax 1314; X64-BMI2-NEXT: retq 1315 %numhighbits = sub i32 32, %numlowbits 1316 %mask = shl i32 -1, %numhighbits 1317 %masked = and i32 %val, %mask ; swapped order 1318 ret i32 %masked 1319} 1320 1321; 64-bit 1322 1323define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind { 1324; X86-NOBMI2-LABEL: clear_lowbits64_ic0: 1325; X86-NOBMI2: # %bb.0: 1326; X86-NOBMI2-NEXT: movl $64, %ecx 1327; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1328; X86-NOBMI2-NEXT: movl $-1, %edx 1329; X86-NOBMI2-NEXT: movl $-1, %eax 1330; X86-NOBMI2-NEXT: shll %cl, %eax 1331; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1332; X86-NOBMI2-NEXT: testb $32, %cl 1333; X86-NOBMI2-NEXT: je .LBB31_2 1334; X86-NOBMI2-NEXT: # %bb.1: 1335; X86-NOBMI2-NEXT: movl %eax, %edx 1336; X86-NOBMI2-NEXT: xorl %eax, %eax 1337; X86-NOBMI2-NEXT: .LBB31_2: 1338; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1339; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1340; X86-NOBMI2-NEXT: retl 1341; 1342; X86-BMI2-LABEL: clear_lowbits64_ic0: 1343; X86-BMI2: # %bb.0: 1344; X86-BMI2-NEXT: movl $64, %ecx 1345; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1346; X86-BMI2-NEXT: movl $-1, %edx 1347; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1348; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1349; X86-BMI2-NEXT: testb $32, %cl 1350; X86-BMI2-NEXT: je .LBB31_2 1351; X86-BMI2-NEXT: # %bb.1: 1352; X86-BMI2-NEXT: movl %eax, %edx 1353; X86-BMI2-NEXT: xorl %eax, %eax 1354; X86-BMI2-NEXT: .LBB31_2: 1355; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1356; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1357; X86-BMI2-NEXT: retl 1358; 1359; X64-NOBMI2-LABEL: clear_lowbits64_ic0: 1360; X64-NOBMI2: # %bb.0: 1361; X64-NOBMI2-NEXT: movl $64, %ecx 1362; X64-NOBMI2-NEXT: subl %esi, %ecx 1363; X64-NOBMI2-NEXT: shrq %cl, %rdi 1364; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1365; X64-NOBMI2-NEXT: shlq %cl, %rdi 1366; X64-NOBMI2-NEXT: movq %rdi, %rax 1367; X64-NOBMI2-NEXT: retq 1368; 1369; X64-BMI2-LABEL: clear_lowbits64_ic0: 1370; X64-BMI2: # %bb.0: 1371; X64-BMI2-NEXT: movl $64, %eax 1372; X64-BMI2-NEXT: subl %esi, %eax 1373; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx 1374; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1375; X64-BMI2-NEXT: retq 1376 %numhighbits = sub i64 64, %numlowbits 1377 %mask = shl i64 -1, %numhighbits 1378 %masked = and i64 %mask, %val 1379 ret i64 %masked 1380} 1381 1382define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind { 1383; X86-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: 1384; X86-NOBMI2: # %bb.0: 1385; X86-NOBMI2-NEXT: movb $64, %cl 1386; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1387; X86-NOBMI2-NEXT: movl $-1, %edx 1388; X86-NOBMI2-NEXT: movl $-1, %eax 1389; X86-NOBMI2-NEXT: shll %cl, %eax 1390; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1391; X86-NOBMI2-NEXT: testb $32, %cl 1392; X86-NOBMI2-NEXT: je .LBB32_2 1393; X86-NOBMI2-NEXT: # %bb.1: 1394; X86-NOBMI2-NEXT: movl %eax, %edx 1395; X86-NOBMI2-NEXT: xorl %eax, %eax 1396; X86-NOBMI2-NEXT: .LBB32_2: 1397; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1398; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1399; X86-NOBMI2-NEXT: retl 1400; 1401; X86-BMI2-LABEL: clear_lowbits64_ic1_indexzext: 1402; X86-BMI2: # %bb.0: 1403; X86-BMI2-NEXT: movb $64, %cl 1404; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1405; X86-BMI2-NEXT: movl $-1, %edx 1406; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1407; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1408; X86-BMI2-NEXT: testb $32, %cl 1409; X86-BMI2-NEXT: je .LBB32_2 1410; X86-BMI2-NEXT: # %bb.1: 1411; X86-BMI2-NEXT: movl %eax, %edx 1412; X86-BMI2-NEXT: xorl %eax, %eax 1413; X86-BMI2-NEXT: .LBB32_2: 1414; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1415; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1416; X86-BMI2-NEXT: retl 1417; 1418; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: 1419; X64-NOBMI2: # %bb.0: 1420; X64-NOBMI2-NEXT: movb $64, %cl 1421; X64-NOBMI2-NEXT: subb %sil, %cl 1422; X64-NOBMI2-NEXT: shrq %cl, %rdi 1423; X64-NOBMI2-NEXT: shlq %cl, %rdi 1424; X64-NOBMI2-NEXT: movq %rdi, %rax 1425; X64-NOBMI2-NEXT: retq 1426; 1427; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext: 1428; X64-BMI2: # %bb.0: 1429; X64-BMI2-NEXT: movb $64, %al 1430; X64-BMI2-NEXT: subb %sil, %al 1431; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx 1432; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1433; X64-BMI2-NEXT: retq 1434 %numhighbits = sub i8 64, %numlowbits 1435 %sh_prom = zext i8 %numhighbits to i64 1436 %mask = shl i64 -1, %sh_prom 1437 %masked = and i64 %mask, %val 1438 ret i64 %masked 1439} 1440 1441define i64 @clear_lowbits64_ic2_load(i64* %w, i64 %numlowbits) nounwind { 1442; X86-NOBMI2-LABEL: clear_lowbits64_ic2_load: 1443; X86-NOBMI2: # %bb.0: 1444; X86-NOBMI2-NEXT: pushl %esi 1445; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1446; X86-NOBMI2-NEXT: movl $64, %ecx 1447; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1448; X86-NOBMI2-NEXT: movl $-1, %edx 1449; X86-NOBMI2-NEXT: movl $-1, %eax 1450; X86-NOBMI2-NEXT: shll %cl, %eax 1451; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1452; X86-NOBMI2-NEXT: testb $32, %cl 1453; X86-NOBMI2-NEXT: je .LBB33_2 1454; X86-NOBMI2-NEXT: # %bb.1: 1455; X86-NOBMI2-NEXT: movl %eax, %edx 1456; X86-NOBMI2-NEXT: xorl %eax, %eax 1457; X86-NOBMI2-NEXT: .LBB33_2: 1458; X86-NOBMI2-NEXT: andl 4(%esi), %edx 1459; X86-NOBMI2-NEXT: andl (%esi), %eax 1460; X86-NOBMI2-NEXT: popl %esi 1461; X86-NOBMI2-NEXT: retl 1462; 1463; X86-BMI2-LABEL: clear_lowbits64_ic2_load: 1464; X86-BMI2: # %bb.0: 1465; X86-BMI2-NEXT: pushl %esi 1466; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1467; X86-BMI2-NEXT: movl $64, %ecx 1468; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1469; X86-BMI2-NEXT: movl $-1, %edx 1470; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1471; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1472; X86-BMI2-NEXT: testb $32, %cl 1473; X86-BMI2-NEXT: je .LBB33_2 1474; X86-BMI2-NEXT: # %bb.1: 1475; X86-BMI2-NEXT: movl %eax, %edx 1476; X86-BMI2-NEXT: xorl %eax, %eax 1477; X86-BMI2-NEXT: .LBB33_2: 1478; X86-BMI2-NEXT: andl 4(%esi), %edx 1479; X86-BMI2-NEXT: andl (%esi), %eax 1480; X86-BMI2-NEXT: popl %esi 1481; X86-BMI2-NEXT: retl 1482; 1483; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load: 1484; X64-NOBMI2: # %bb.0: 1485; X64-NOBMI2-NEXT: movq (%rdi), %rax 1486; X64-NOBMI2-NEXT: movl $64, %ecx 1487; X64-NOBMI2-NEXT: subl %esi, %ecx 1488; X64-NOBMI2-NEXT: shrq %cl, %rax 1489; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1490; X64-NOBMI2-NEXT: shlq %cl, %rax 1491; X64-NOBMI2-NEXT: retq 1492; 1493; X64-BMI2-LABEL: clear_lowbits64_ic2_load: 1494; X64-BMI2: # %bb.0: 1495; X64-BMI2-NEXT: movl $64, %eax 1496; X64-BMI2-NEXT: subl %esi, %eax 1497; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx 1498; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1499; X64-BMI2-NEXT: retq 1500 %val = load i64, i64* %w 1501 %numhighbits = sub i64 64, %numlowbits 1502 %mask = shl i64 -1, %numhighbits 1503 %masked = and i64 %mask, %val 1504 ret i64 %masked 1505} 1506 1507define i64 @clear_lowbits64_ic3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 1508; X86-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1509; X86-NOBMI2: # %bb.0: 1510; X86-NOBMI2-NEXT: pushl %esi 1511; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1512; X86-NOBMI2-NEXT: movb $64, %cl 1513; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1514; X86-NOBMI2-NEXT: movl $-1, %edx 1515; X86-NOBMI2-NEXT: movl $-1, %eax 1516; X86-NOBMI2-NEXT: shll %cl, %eax 1517; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1518; X86-NOBMI2-NEXT: testb $32, %cl 1519; X86-NOBMI2-NEXT: je .LBB34_2 1520; X86-NOBMI2-NEXT: # %bb.1: 1521; X86-NOBMI2-NEXT: movl %eax, %edx 1522; X86-NOBMI2-NEXT: xorl %eax, %eax 1523; X86-NOBMI2-NEXT: .LBB34_2: 1524; X86-NOBMI2-NEXT: andl 4(%esi), %edx 1525; X86-NOBMI2-NEXT: andl (%esi), %eax 1526; X86-NOBMI2-NEXT: popl %esi 1527; X86-NOBMI2-NEXT: retl 1528; 1529; X86-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1530; X86-BMI2: # %bb.0: 1531; X86-BMI2-NEXT: pushl %esi 1532; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1533; X86-BMI2-NEXT: movb $64, %cl 1534; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1535; X86-BMI2-NEXT: movl $-1, %edx 1536; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1537; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1538; X86-BMI2-NEXT: testb $32, %cl 1539; X86-BMI2-NEXT: je .LBB34_2 1540; X86-BMI2-NEXT: # %bb.1: 1541; X86-BMI2-NEXT: movl %eax, %edx 1542; X86-BMI2-NEXT: xorl %eax, %eax 1543; X86-BMI2-NEXT: .LBB34_2: 1544; X86-BMI2-NEXT: andl 4(%esi), %edx 1545; X86-BMI2-NEXT: andl (%esi), %eax 1546; X86-BMI2-NEXT: popl %esi 1547; X86-BMI2-NEXT: retl 1548; 1549; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1550; X64-NOBMI2: # %bb.0: 1551; X64-NOBMI2-NEXT: movq (%rdi), %rax 1552; X64-NOBMI2-NEXT: movb $64, %cl 1553; X64-NOBMI2-NEXT: subb %sil, %cl 1554; X64-NOBMI2-NEXT: shrq %cl, %rax 1555; X64-NOBMI2-NEXT: shlq %cl, %rax 1556; X64-NOBMI2-NEXT: retq 1557; 1558; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: 1559; X64-BMI2: # %bb.0: 1560; X64-BMI2-NEXT: movb $64, %al 1561; X64-BMI2-NEXT: subb %sil, %al 1562; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx 1563; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1564; X64-BMI2-NEXT: retq 1565 %val = load i64, i64* %w 1566 %numhighbits = sub i8 64, %numlowbits 1567 %sh_prom = zext i8 %numhighbits to i64 1568 %mask = shl i64 -1, %sh_prom 1569 %masked = and i64 %mask, %val 1570 ret i64 %masked 1571} 1572 1573define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind { 1574; X86-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: 1575; X86-NOBMI2: # %bb.0: 1576; X86-NOBMI2-NEXT: movl $64, %ecx 1577; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1578; X86-NOBMI2-NEXT: movl $-1, %edx 1579; X86-NOBMI2-NEXT: movl $-1, %eax 1580; X86-NOBMI2-NEXT: shll %cl, %eax 1581; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx 1582; X86-NOBMI2-NEXT: testb $32, %cl 1583; X86-NOBMI2-NEXT: je .LBB35_2 1584; X86-NOBMI2-NEXT: # %bb.1: 1585; X86-NOBMI2-NEXT: movl %eax, %edx 1586; X86-NOBMI2-NEXT: xorl %eax, %eax 1587; X86-NOBMI2-NEXT: .LBB35_2: 1588; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1589; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1590; X86-NOBMI2-NEXT: retl 1591; 1592; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative: 1593; X86-BMI2: # %bb.0: 1594; X86-BMI2-NEXT: movl $64, %ecx 1595; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1596; X86-BMI2-NEXT: movl $-1, %edx 1597; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax 1598; X86-BMI2-NEXT: shldl %cl, %edx, %edx 1599; X86-BMI2-NEXT: testb $32, %cl 1600; X86-BMI2-NEXT: je .LBB35_2 1601; X86-BMI2-NEXT: # %bb.1: 1602; X86-BMI2-NEXT: movl %eax, %edx 1603; X86-BMI2-NEXT: xorl %eax, %eax 1604; X86-BMI2-NEXT: .LBB35_2: 1605; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1606; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1607; X86-BMI2-NEXT: retl 1608; 1609; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: 1610; X64-NOBMI2: # %bb.0: 1611; X64-NOBMI2-NEXT: movl $64, %ecx 1612; X64-NOBMI2-NEXT: subl %esi, %ecx 1613; X64-NOBMI2-NEXT: shrq %cl, %rdi 1614; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1615; X64-NOBMI2-NEXT: shlq %cl, %rdi 1616; X64-NOBMI2-NEXT: movq %rdi, %rax 1617; X64-NOBMI2-NEXT: retq 1618; 1619; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative: 1620; X64-BMI2: # %bb.0: 1621; X64-BMI2-NEXT: movl $64, %eax 1622; X64-BMI2-NEXT: subl %esi, %eax 1623; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx 1624; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax 1625; X64-BMI2-NEXT: retq 1626 %numhighbits = sub i64 64, %numlowbits 1627 %mask = shl i64 -1, %numhighbits 1628 %masked = and i64 %val, %mask ; swapped order 1629 ret i64 %masked 1630} 1631 1632; ---------------------------------------------------------------------------- ; 1633; Multi-use tests 1634; ---------------------------------------------------------------------------- ; 1635 1636declare void @use32(i32) 1637declare void @use64(i64) 1638 1639define i32 @oneuse32(i32 %val, i32 %numlowbits) nounwind { 1640; X86-NOBMI2-LABEL: oneuse32: 1641; X86-NOBMI2: # %bb.0: 1642; X86-NOBMI2-NEXT: pushl %esi 1643; X86-NOBMI2-NEXT: subl $8, %esp 1644; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1645; X86-NOBMI2-NEXT: movl $-1, %esi 1646; X86-NOBMI2-NEXT: shll %cl, %esi 1647; X86-NOBMI2-NEXT: movl %esi, (%esp) 1648; X86-NOBMI2-NEXT: calll use32 1649; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1650; X86-NOBMI2-NEXT: movl %esi, %eax 1651; X86-NOBMI2-NEXT: addl $8, %esp 1652; X86-NOBMI2-NEXT: popl %esi 1653; X86-NOBMI2-NEXT: retl 1654; 1655; X86-BMI2-LABEL: oneuse32: 1656; X86-BMI2: # %bb.0: 1657; X86-BMI2-NEXT: pushl %esi 1658; X86-BMI2-NEXT: subl $8, %esp 1659; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1660; X86-BMI2-NEXT: movl $-1, %ecx 1661; X86-BMI2-NEXT: shlxl %eax, %ecx, %esi 1662; X86-BMI2-NEXT: movl %esi, (%esp) 1663; X86-BMI2-NEXT: calll use32 1664; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1665; X86-BMI2-NEXT: movl %esi, %eax 1666; X86-BMI2-NEXT: addl $8, %esp 1667; X86-BMI2-NEXT: popl %esi 1668; X86-BMI2-NEXT: retl 1669; 1670; X64-NOBMI2-LABEL: oneuse32: 1671; X64-NOBMI2: # %bb.0: 1672; X64-NOBMI2-NEXT: pushq %rbp 1673; X64-NOBMI2-NEXT: pushq %rbx 1674; X64-NOBMI2-NEXT: pushq %rax 1675; X64-NOBMI2-NEXT: movl %edi, %ebx 1676; X64-NOBMI2-NEXT: movl $-1, %ebp 1677; X64-NOBMI2-NEXT: movl %esi, %ecx 1678; X64-NOBMI2-NEXT: shll %cl, %ebp 1679; X64-NOBMI2-NEXT: movl %ebp, %edi 1680; X64-NOBMI2-NEXT: callq use32 1681; X64-NOBMI2-NEXT: andl %ebx, %ebp 1682; X64-NOBMI2-NEXT: movl %ebp, %eax 1683; X64-NOBMI2-NEXT: addq $8, %rsp 1684; X64-NOBMI2-NEXT: popq %rbx 1685; X64-NOBMI2-NEXT: popq %rbp 1686; X64-NOBMI2-NEXT: retq 1687; 1688; X64-BMI2-LABEL: oneuse32: 1689; X64-BMI2: # %bb.0: 1690; X64-BMI2-NEXT: pushq %rbp 1691; X64-BMI2-NEXT: pushq %rbx 1692; X64-BMI2-NEXT: pushq %rax 1693; X64-BMI2-NEXT: movl %edi, %ebx 1694; X64-BMI2-NEXT: movl $-1, %eax 1695; X64-BMI2-NEXT: shlxl %esi, %eax, %ebp 1696; X64-BMI2-NEXT: movl %ebp, %edi 1697; X64-BMI2-NEXT: callq use32 1698; X64-BMI2-NEXT: andl %ebx, %ebp 1699; X64-BMI2-NEXT: movl %ebp, %eax 1700; X64-BMI2-NEXT: addq $8, %rsp 1701; X64-BMI2-NEXT: popq %rbx 1702; X64-BMI2-NEXT: popq %rbp 1703; X64-BMI2-NEXT: retq 1704 %mask = shl i32 -1, %numlowbits 1705 call void @use32(i32 %mask) 1706 %masked = and i32 %mask, %val 1707 ret i32 %masked 1708} 1709 1710define i64 @oneuse64(i64 %val, i64 %numlowbits) nounwind { 1711; X86-NOBMI2-LABEL: oneuse64: 1712; X86-NOBMI2: # %bb.0: 1713; X86-NOBMI2-NEXT: pushl %edi 1714; X86-NOBMI2-NEXT: pushl %esi 1715; X86-NOBMI2-NEXT: pushl %eax 1716; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1717; X86-NOBMI2-NEXT: movl $-1, %esi 1718; X86-NOBMI2-NEXT: movl $-1, %edi 1719; X86-NOBMI2-NEXT: shll %cl, %edi 1720; X86-NOBMI2-NEXT: shldl %cl, %esi, %esi 1721; X86-NOBMI2-NEXT: testb $32, %cl 1722; X86-NOBMI2-NEXT: je .LBB37_2 1723; X86-NOBMI2-NEXT: # %bb.1: 1724; X86-NOBMI2-NEXT: movl %edi, %esi 1725; X86-NOBMI2-NEXT: xorl %edi, %edi 1726; X86-NOBMI2-NEXT: .LBB37_2: 1727; X86-NOBMI2-NEXT: subl $8, %esp 1728; X86-NOBMI2-NEXT: pushl %esi 1729; X86-NOBMI2-NEXT: pushl %edi 1730; X86-NOBMI2-NEXT: calll use64 1731; X86-NOBMI2-NEXT: addl $16, %esp 1732; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1733; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 1734; X86-NOBMI2-NEXT: movl %edi, %eax 1735; X86-NOBMI2-NEXT: movl %esi, %edx 1736; X86-NOBMI2-NEXT: addl $4, %esp 1737; X86-NOBMI2-NEXT: popl %esi 1738; X86-NOBMI2-NEXT: popl %edi 1739; X86-NOBMI2-NEXT: retl 1740; 1741; X86-BMI2-LABEL: oneuse64: 1742; X86-BMI2: # %bb.0: 1743; X86-BMI2-NEXT: pushl %edi 1744; X86-BMI2-NEXT: pushl %esi 1745; X86-BMI2-NEXT: pushl %eax 1746; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1747; X86-BMI2-NEXT: movl $-1, %esi 1748; X86-BMI2-NEXT: shlxl %ecx, %esi, %edi 1749; X86-BMI2-NEXT: shldl %cl, %esi, %esi 1750; X86-BMI2-NEXT: testb $32, %cl 1751; X86-BMI2-NEXT: je .LBB37_2 1752; X86-BMI2-NEXT: # %bb.1: 1753; X86-BMI2-NEXT: movl %edi, %esi 1754; X86-BMI2-NEXT: xorl %edi, %edi 1755; X86-BMI2-NEXT: .LBB37_2: 1756; X86-BMI2-NEXT: subl $8, %esp 1757; X86-BMI2-NEXT: pushl %esi 1758; X86-BMI2-NEXT: pushl %edi 1759; X86-BMI2-NEXT: calll use64 1760; X86-BMI2-NEXT: addl $16, %esp 1761; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1762; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 1763; X86-BMI2-NEXT: movl %edi, %eax 1764; X86-BMI2-NEXT: movl %esi, %edx 1765; X86-BMI2-NEXT: addl $4, %esp 1766; X86-BMI2-NEXT: popl %esi 1767; X86-BMI2-NEXT: popl %edi 1768; X86-BMI2-NEXT: retl 1769; 1770; X64-NOBMI2-LABEL: oneuse64: 1771; X64-NOBMI2: # %bb.0: 1772; X64-NOBMI2-NEXT: pushq %r14 1773; X64-NOBMI2-NEXT: pushq %rbx 1774; X64-NOBMI2-NEXT: pushq %rax 1775; X64-NOBMI2-NEXT: movq %rdi, %r14 1776; X64-NOBMI2-NEXT: movq $-1, %rbx 1777; X64-NOBMI2-NEXT: movl %esi, %ecx 1778; X64-NOBMI2-NEXT: shlq %cl, %rbx 1779; X64-NOBMI2-NEXT: movq %rbx, %rdi 1780; X64-NOBMI2-NEXT: callq use64 1781; X64-NOBMI2-NEXT: andq %r14, %rbx 1782; X64-NOBMI2-NEXT: movq %rbx, %rax 1783; X64-NOBMI2-NEXT: addq $8, %rsp 1784; X64-NOBMI2-NEXT: popq %rbx 1785; X64-NOBMI2-NEXT: popq %r14 1786; X64-NOBMI2-NEXT: retq 1787; 1788; X64-BMI2-LABEL: oneuse64: 1789; X64-BMI2: # %bb.0: 1790; X64-BMI2-NEXT: pushq %r14 1791; X64-BMI2-NEXT: pushq %rbx 1792; X64-BMI2-NEXT: pushq %rax 1793; X64-BMI2-NEXT: movq %rdi, %r14 1794; X64-BMI2-NEXT: movq $-1, %rax 1795; X64-BMI2-NEXT: shlxq %rsi, %rax, %rbx 1796; X64-BMI2-NEXT: movq %rbx, %rdi 1797; X64-BMI2-NEXT: callq use64 1798; X64-BMI2-NEXT: andq %r14, %rbx 1799; X64-BMI2-NEXT: movq %rbx, %rax 1800; X64-BMI2-NEXT: addq $8, %rsp 1801; X64-BMI2-NEXT: popq %rbx 1802; X64-BMI2-NEXT: popq %r14 1803; X64-BMI2-NEXT: retq 1804 %mask = shl i64 -1, %numlowbits 1805 call void @use64(i64 %mask) 1806 %masked = and i64 %mask, %val 1807 ret i64 %masked 1808} 1809