1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 12 13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll 14 15; https://bugs.llvm.org/show_bug.cgi?id=36419 16; https://bugs.llvm.org/show_bug.cgi?id=37603 17; https://bugs.llvm.org/show_bug.cgi?id=37610 18 19; Patterns: 20; a) x & (1 << nbits) - 1 21; b) x & ~(-1 << nbits) 22; c) x & (-1 >> (32 - y)) 23; d) x << (32 - y) >> (32 - y) 24; are equivalent. 25 26; ---------------------------------------------------------------------------- ; 27; Pattern a. 32-bit 28; ---------------------------------------------------------------------------- ; 29 30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { 31; X86-NOBMI-LABEL: bzhi32_a0: 32; X86-NOBMI: # %bb.0: 33; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 34; X86-NOBMI-NEXT: movl $1, %eax 35; X86-NOBMI-NEXT: shll %cl, %eax 36; X86-NOBMI-NEXT: decl %eax 37; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 38; X86-NOBMI-NEXT: retl 39; 40; X86-BMI1NOTBM-LABEL: bzhi32_a0: 41; X86-BMI1NOTBM: # %bb.0: 42; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 43; X86-BMI1NOTBM-NEXT: shll $8, %eax 44; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 45; X86-BMI1NOTBM-NEXT: retl 46; 47; X86-BMI1BMI2-LABEL: bzhi32_a0: 48; X86-BMI1BMI2: # %bb.0: 49; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 50; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 51; X86-BMI1BMI2-NEXT: retl 52; 53; X64-NOBMI-LABEL: bzhi32_a0: 54; X64-NOBMI: # %bb.0: 55; X64-NOBMI-NEXT: movl %esi, %ecx 56; X64-NOBMI-NEXT: movl $1, %eax 57; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 58; X64-NOBMI-NEXT: shll %cl, %eax 59; X64-NOBMI-NEXT: decl %eax 60; X64-NOBMI-NEXT: andl %edi, %eax 61; X64-NOBMI-NEXT: retq 62; 63; X64-BMI1NOTBM-LABEL: bzhi32_a0: 64; X64-BMI1NOTBM: # %bb.0: 65; X64-BMI1NOTBM-NEXT: shll $8, %esi 66; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 67; X64-BMI1NOTBM-NEXT: retq 68; 69; X64-BMI1BMI2-LABEL: bzhi32_a0: 70; X64-BMI1BMI2: # %bb.0: 71; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 72; X64-BMI1BMI2-NEXT: retq 73 %onebit = shl i32 1, %numlowbits 74 %mask = add nsw i32 %onebit, -1 75 %masked = and i32 %mask, %val 76 ret i32 %masked 77} 78 79define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 80; X86-NOBMI-LABEL: bzhi32_a1_indexzext: 81; X86-NOBMI: # %bb.0: 82; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 83; X86-NOBMI-NEXT: movl $1, %eax 84; X86-NOBMI-NEXT: shll %cl, %eax 85; X86-NOBMI-NEXT: decl %eax 86; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 87; X86-NOBMI-NEXT: retl 88; 89; X86-BMI1NOTBM-LABEL: bzhi32_a1_indexzext: 90; X86-BMI1NOTBM: # %bb.0: 91; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 92; X86-BMI1NOTBM-NEXT: shll $8, %eax 93; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 94; X86-BMI1NOTBM-NEXT: retl 95; 96; X86-BMI1BMI2-LABEL: bzhi32_a1_indexzext: 97; X86-BMI1BMI2: # %bb.0: 98; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 99; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 100; X86-BMI1BMI2-NEXT: retl 101; 102; X64-NOBMI-LABEL: bzhi32_a1_indexzext: 103; X64-NOBMI: # %bb.0: 104; X64-NOBMI-NEXT: movl %esi, %ecx 105; X64-NOBMI-NEXT: movl $1, %eax 106; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 107; X64-NOBMI-NEXT: shll %cl, %eax 108; X64-NOBMI-NEXT: decl %eax 109; X64-NOBMI-NEXT: andl %edi, %eax 110; X64-NOBMI-NEXT: retq 111; 112; X64-BMI1NOTBM-LABEL: bzhi32_a1_indexzext: 113; X64-BMI1NOTBM: # %bb.0: 114; X64-BMI1NOTBM-NEXT: shll $8, %esi 115; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 116; X64-BMI1NOTBM-NEXT: retq 117; 118; X64-BMI1BMI2-LABEL: bzhi32_a1_indexzext: 119; X64-BMI1BMI2: # %bb.0: 120; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 121; X64-BMI1BMI2-NEXT: retq 122 %conv = zext i8 %numlowbits to i32 123 %onebit = shl i32 1, %conv 124 %mask = add nsw i32 %onebit, -1 125 %masked = and i32 %mask, %val 126 ret i32 %masked 127} 128 129define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { 130; X86-NOBMI-LABEL: bzhi32_a2_load: 131; X86-NOBMI: # %bb.0: 132; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 133; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 134; X86-NOBMI-NEXT: movl $1, %eax 135; X86-NOBMI-NEXT: shll %cl, %eax 136; X86-NOBMI-NEXT: decl %eax 137; X86-NOBMI-NEXT: andl (%edx), %eax 138; X86-NOBMI-NEXT: retl 139; 140; X86-BMI1NOTBM-LABEL: bzhi32_a2_load: 141; X86-BMI1NOTBM: # %bb.0: 142; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 143; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 144; X86-BMI1NOTBM-NEXT: shll $8, %ecx 145; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax 146; X86-BMI1NOTBM-NEXT: retl 147; 148; X86-BMI1BMI2-LABEL: bzhi32_a2_load: 149; X86-BMI1BMI2: # %bb.0: 150; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 152; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 153; X86-BMI1BMI2-NEXT: retl 154; 155; X64-NOBMI-LABEL: bzhi32_a2_load: 156; X64-NOBMI: # %bb.0: 157; X64-NOBMI-NEXT: movl %esi, %ecx 158; X64-NOBMI-NEXT: movl $1, %eax 159; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 160; X64-NOBMI-NEXT: shll %cl, %eax 161; X64-NOBMI-NEXT: decl %eax 162; X64-NOBMI-NEXT: andl (%rdi), %eax 163; X64-NOBMI-NEXT: retq 164; 165; X64-BMI1NOTBM-LABEL: bzhi32_a2_load: 166; X64-BMI1NOTBM: # %bb.0: 167; X64-BMI1NOTBM-NEXT: shll $8, %esi 168; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax 169; X64-BMI1NOTBM-NEXT: retq 170; 171; X64-BMI1BMI2-LABEL: bzhi32_a2_load: 172; X64-BMI1BMI2: # %bb.0: 173; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 174; X64-BMI1BMI2-NEXT: retq 175 %val = load i32, i32* %w 176 %onebit = shl i32 1, %numlowbits 177 %mask = add nsw i32 %onebit, -1 178 %masked = and i32 %mask, %val 179 ret i32 %masked 180} 181 182define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { 183; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext: 184; X86-NOBMI: # %bb.0: 185; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 186; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 187; X86-NOBMI-NEXT: movl $1, %eax 188; X86-NOBMI-NEXT: shll %cl, %eax 189; X86-NOBMI-NEXT: decl %eax 190; X86-NOBMI-NEXT: andl (%edx), %eax 191; X86-NOBMI-NEXT: retl 192; 193; X86-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext: 194; X86-BMI1NOTBM: # %bb.0: 195; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 196; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 197; X86-BMI1NOTBM-NEXT: shll $8, %ecx 198; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax 199; X86-BMI1NOTBM-NEXT: retl 200; 201; X86-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: 202; X86-BMI1BMI2: # %bb.0: 203; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 205; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 206; X86-BMI1BMI2-NEXT: retl 207; 208; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: 209; X64-NOBMI: # %bb.0: 210; X64-NOBMI-NEXT: movl %esi, %ecx 211; X64-NOBMI-NEXT: movl $1, %eax 212; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 213; X64-NOBMI-NEXT: shll %cl, %eax 214; X64-NOBMI-NEXT: decl %eax 215; X64-NOBMI-NEXT: andl (%rdi), %eax 216; X64-NOBMI-NEXT: retq 217; 218; X64-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext: 219; X64-BMI1NOTBM: # %bb.0: 220; X64-BMI1NOTBM-NEXT: shll $8, %esi 221; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax 222; X64-BMI1NOTBM-NEXT: retq 223; 224; X64-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: 225; X64-BMI1BMI2: # %bb.0: 226; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 227; X64-BMI1BMI2-NEXT: retq 228 %val = load i32, i32* %w 229 %conv = zext i8 %numlowbits to i32 230 %onebit = shl i32 1, %conv 231 %mask = add nsw i32 %onebit, -1 232 %masked = and i32 %mask, %val 233 ret i32 %masked 234} 235 236define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { 237; X86-NOBMI-LABEL: bzhi32_a4_commutative: 238; X86-NOBMI: # %bb.0: 239; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 240; X86-NOBMI-NEXT: movl $1, %eax 241; X86-NOBMI-NEXT: shll %cl, %eax 242; X86-NOBMI-NEXT: decl %eax 243; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 244; X86-NOBMI-NEXT: retl 245; 246; X86-BMI1NOTBM-LABEL: bzhi32_a4_commutative: 247; X86-BMI1NOTBM: # %bb.0: 248; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 249; X86-BMI1NOTBM-NEXT: shll $8, %eax 250; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 251; X86-BMI1NOTBM-NEXT: retl 252; 253; X86-BMI1BMI2-LABEL: bzhi32_a4_commutative: 254; X86-BMI1BMI2: # %bb.0: 255; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 256; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 257; X86-BMI1BMI2-NEXT: retl 258; 259; X64-NOBMI-LABEL: bzhi32_a4_commutative: 260; X64-NOBMI: # %bb.0: 261; X64-NOBMI-NEXT: movl %esi, %ecx 262; X64-NOBMI-NEXT: movl $1, %eax 263; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 264; X64-NOBMI-NEXT: shll %cl, %eax 265; X64-NOBMI-NEXT: decl %eax 266; X64-NOBMI-NEXT: andl %edi, %eax 267; X64-NOBMI-NEXT: retq 268; 269; X64-BMI1NOTBM-LABEL: bzhi32_a4_commutative: 270; X64-BMI1NOTBM: # %bb.0: 271; X64-BMI1NOTBM-NEXT: shll $8, %esi 272; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 273; X64-BMI1NOTBM-NEXT: retq 274; 275; X64-BMI1BMI2-LABEL: bzhi32_a4_commutative: 276; X64-BMI1BMI2: # %bb.0: 277; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 278; X64-BMI1BMI2-NEXT: retq 279 %onebit = shl i32 1, %numlowbits 280 %mask = add nsw i32 %onebit, -1 281 %masked = and i32 %val, %mask ; swapped order 282 ret i32 %masked 283} 284 285; 64-bit 286 287define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { 288; X86-NOBMI-LABEL: bzhi64_a0: 289; X86-NOBMI: # %bb.0: 290; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 291; X86-NOBMI-NEXT: movl $1, %eax 292; X86-NOBMI-NEXT: xorl %edx, %edx 293; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 294; X86-NOBMI-NEXT: shll %cl, %eax 295; X86-NOBMI-NEXT: testb $32, %cl 296; X86-NOBMI-NEXT: je .LBB5_2 297; X86-NOBMI-NEXT: # %bb.1: 298; X86-NOBMI-NEXT: movl %eax, %edx 299; X86-NOBMI-NEXT: xorl %eax, %eax 300; X86-NOBMI-NEXT: .LBB5_2: 301; X86-NOBMI-NEXT: addl $-1, %eax 302; X86-NOBMI-NEXT: adcl $-1, %edx 303; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 304; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 305; X86-NOBMI-NEXT: retl 306; 307; X86-BMI1NOTBM-LABEL: bzhi64_a0: 308; X86-BMI1NOTBM: # %bb.0: 309; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 310; X86-BMI1NOTBM-NEXT: movl $1, %eax 311; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 312; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx 313; X86-BMI1NOTBM-NEXT: shll %cl, %eax 314; X86-BMI1NOTBM-NEXT: testb $32, %cl 315; X86-BMI1NOTBM-NEXT: je .LBB5_2 316; X86-BMI1NOTBM-NEXT: # %bb.1: 317; X86-BMI1NOTBM-NEXT: movl %eax, %edx 318; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 319; X86-BMI1NOTBM-NEXT: .LBB5_2: 320; X86-BMI1NOTBM-NEXT: addl $-1, %eax 321; X86-BMI1NOTBM-NEXT: adcl $-1, %edx 322; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx 323; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax 324; X86-BMI1NOTBM-NEXT: retl 325; 326; X86-BMI1BMI2-LABEL: bzhi64_a0: 327; X86-BMI1BMI2: # %bb.0: 328; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 329; X86-BMI1BMI2-NEXT: movl $1, %eax 330; X86-BMI1BMI2-NEXT: xorl %edx, %edx 331; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 332; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 333; X86-BMI1BMI2-NEXT: testb $32, %cl 334; X86-BMI1BMI2-NEXT: je .LBB5_2 335; X86-BMI1BMI2-NEXT: # %bb.1: 336; X86-BMI1BMI2-NEXT: movl %eax, %edx 337; X86-BMI1BMI2-NEXT: xorl %eax, %eax 338; X86-BMI1BMI2-NEXT: .LBB5_2: 339; X86-BMI1BMI2-NEXT: addl $-1, %eax 340; X86-BMI1BMI2-NEXT: adcl $-1, %edx 341; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 342; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 343; X86-BMI1BMI2-NEXT: retl 344; 345; X64-NOBMI-LABEL: bzhi64_a0: 346; X64-NOBMI: # %bb.0: 347; X64-NOBMI-NEXT: movq %rsi, %rcx 348; X64-NOBMI-NEXT: movl $1, %eax 349; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 350; X64-NOBMI-NEXT: shlq %cl, %rax 351; X64-NOBMI-NEXT: decq %rax 352; X64-NOBMI-NEXT: andq %rdi, %rax 353; X64-NOBMI-NEXT: retq 354; 355; X64-BMI1NOTBM-LABEL: bzhi64_a0: 356; X64-BMI1NOTBM: # %bb.0: 357; X64-BMI1NOTBM-NEXT: shll $8, %esi 358; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 359; X64-BMI1NOTBM-NEXT: retq 360; 361; X64-BMI1BMI2-LABEL: bzhi64_a0: 362; X64-BMI1BMI2: # %bb.0: 363; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 364; X64-BMI1BMI2-NEXT: retq 365 %onebit = shl i64 1, %numlowbits 366 %mask = add nsw i64 %onebit, -1 367 %masked = and i64 %mask, %val 368 ret i64 %masked 369} 370 371define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 372; X86-NOBMI-LABEL: bzhi64_a1_indexzext: 373; X86-NOBMI: # %bb.0: 374; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 375; X86-NOBMI-NEXT: movl $1, %eax 376; X86-NOBMI-NEXT: xorl %edx, %edx 377; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 378; X86-NOBMI-NEXT: shll %cl, %eax 379; X86-NOBMI-NEXT: testb $32, %cl 380; X86-NOBMI-NEXT: je .LBB6_2 381; X86-NOBMI-NEXT: # %bb.1: 382; X86-NOBMI-NEXT: movl %eax, %edx 383; X86-NOBMI-NEXT: xorl %eax, %eax 384; X86-NOBMI-NEXT: .LBB6_2: 385; X86-NOBMI-NEXT: addl $-1, %eax 386; X86-NOBMI-NEXT: adcl $-1, %edx 387; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 388; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 389; X86-NOBMI-NEXT: retl 390; 391; X86-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: 392; X86-BMI1NOTBM: # %bb.0: 393; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 394; X86-BMI1NOTBM-NEXT: movl $1, %eax 395; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 396; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx 397; X86-BMI1NOTBM-NEXT: shll %cl, %eax 398; X86-BMI1NOTBM-NEXT: testb $32, %cl 399; X86-BMI1NOTBM-NEXT: je .LBB6_2 400; X86-BMI1NOTBM-NEXT: # %bb.1: 401; X86-BMI1NOTBM-NEXT: movl %eax, %edx 402; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 403; X86-BMI1NOTBM-NEXT: .LBB6_2: 404; X86-BMI1NOTBM-NEXT: addl $-1, %eax 405; X86-BMI1NOTBM-NEXT: adcl $-1, %edx 406; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx 407; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax 408; X86-BMI1NOTBM-NEXT: retl 409; 410; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext: 411; X86-BMI1BMI2: # %bb.0: 412; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 413; X86-BMI1BMI2-NEXT: movl $1, %eax 414; X86-BMI1BMI2-NEXT: xorl %edx, %edx 415; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 416; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 417; X86-BMI1BMI2-NEXT: testb $32, %cl 418; X86-BMI1BMI2-NEXT: je .LBB6_2 419; X86-BMI1BMI2-NEXT: # %bb.1: 420; X86-BMI1BMI2-NEXT: movl %eax, %edx 421; X86-BMI1BMI2-NEXT: xorl %eax, %eax 422; X86-BMI1BMI2-NEXT: .LBB6_2: 423; X86-BMI1BMI2-NEXT: addl $-1, %eax 424; X86-BMI1BMI2-NEXT: adcl $-1, %edx 425; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 426; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 427; X86-BMI1BMI2-NEXT: retl 428; 429; X64-NOBMI-LABEL: bzhi64_a1_indexzext: 430; X64-NOBMI: # %bb.0: 431; X64-NOBMI-NEXT: movl %esi, %ecx 432; X64-NOBMI-NEXT: movl $1, %eax 433; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 434; X64-NOBMI-NEXT: shlq %cl, %rax 435; X64-NOBMI-NEXT: decq %rax 436; X64-NOBMI-NEXT: andq %rdi, %rax 437; X64-NOBMI-NEXT: retq 438; 439; X64-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: 440; X64-BMI1NOTBM: # %bb.0: 441; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi 442; X64-BMI1NOTBM-NEXT: shll $8, %esi 443; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 444; X64-BMI1NOTBM-NEXT: retq 445; 446; X64-BMI1BMI2-LABEL: bzhi64_a1_indexzext: 447; X64-BMI1BMI2: # %bb.0: 448; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 449; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 450; X64-BMI1BMI2-NEXT: retq 451 %conv = zext i8 %numlowbits to i64 452 %onebit = shl i64 1, %conv 453 %mask = add nsw i64 %onebit, -1 454 %masked = and i64 %mask, %val 455 ret i64 %masked 456} 457 458define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { 459; X86-NOBMI-LABEL: bzhi64_a2_load: 460; X86-NOBMI: # %bb.0: 461; X86-NOBMI-NEXT: pushl %esi 462; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 463; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 464; X86-NOBMI-NEXT: movl $1, %eax 465; X86-NOBMI-NEXT: xorl %edx, %edx 466; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 467; X86-NOBMI-NEXT: shll %cl, %eax 468; X86-NOBMI-NEXT: testb $32, %cl 469; X86-NOBMI-NEXT: je .LBB7_2 470; X86-NOBMI-NEXT: # %bb.1: 471; X86-NOBMI-NEXT: movl %eax, %edx 472; X86-NOBMI-NEXT: xorl %eax, %eax 473; X86-NOBMI-NEXT: .LBB7_2: 474; X86-NOBMI-NEXT: addl $-1, %eax 475; X86-NOBMI-NEXT: adcl $-1, %edx 476; X86-NOBMI-NEXT: andl 4(%esi), %edx 477; X86-NOBMI-NEXT: andl (%esi), %eax 478; X86-NOBMI-NEXT: popl %esi 479; X86-NOBMI-NEXT: retl 480; 481; X86-BMI1NOTBM-LABEL: bzhi64_a2_load: 482; X86-BMI1NOTBM: # %bb.0: 483; X86-BMI1NOTBM-NEXT: pushl %esi 484; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi 485; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 486; X86-BMI1NOTBM-NEXT: movl $1, %eax 487; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 488; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx 489; X86-BMI1NOTBM-NEXT: shll %cl, %eax 490; X86-BMI1NOTBM-NEXT: testb $32, %cl 491; X86-BMI1NOTBM-NEXT: je .LBB7_2 492; X86-BMI1NOTBM-NEXT: # %bb.1: 493; X86-BMI1NOTBM-NEXT: movl %eax, %edx 494; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 495; X86-BMI1NOTBM-NEXT: .LBB7_2: 496; X86-BMI1NOTBM-NEXT: addl $-1, %eax 497; X86-BMI1NOTBM-NEXT: adcl $-1, %edx 498; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx 499; X86-BMI1NOTBM-NEXT: andl (%esi), %eax 500; X86-BMI1NOTBM-NEXT: popl %esi 501; X86-BMI1NOTBM-NEXT: retl 502; 503; X86-BMI1BMI2-LABEL: bzhi64_a2_load: 504; X86-BMI1BMI2: # %bb.0: 505; X86-BMI1BMI2-NEXT: pushl %esi 506; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 507; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 508; X86-BMI1BMI2-NEXT: movl $1, %eax 509; X86-BMI1BMI2-NEXT: xorl %edx, %edx 510; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 511; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 512; X86-BMI1BMI2-NEXT: testb $32, %cl 513; X86-BMI1BMI2-NEXT: je .LBB7_2 514; X86-BMI1BMI2-NEXT: # %bb.1: 515; X86-BMI1BMI2-NEXT: movl %eax, %edx 516; X86-BMI1BMI2-NEXT: xorl %eax, %eax 517; X86-BMI1BMI2-NEXT: .LBB7_2: 518; X86-BMI1BMI2-NEXT: addl $-1, %eax 519; X86-BMI1BMI2-NEXT: adcl $-1, %edx 520; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx 521; X86-BMI1BMI2-NEXT: andl (%esi), %eax 522; X86-BMI1BMI2-NEXT: popl %esi 523; X86-BMI1BMI2-NEXT: retl 524; 525; X64-NOBMI-LABEL: bzhi64_a2_load: 526; X64-NOBMI: # %bb.0: 527; X64-NOBMI-NEXT: movq %rsi, %rcx 528; X64-NOBMI-NEXT: movl $1, %eax 529; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 530; X64-NOBMI-NEXT: shlq %cl, %rax 531; X64-NOBMI-NEXT: decq %rax 532; X64-NOBMI-NEXT: andq (%rdi), %rax 533; X64-NOBMI-NEXT: retq 534; 535; X64-BMI1NOTBM-LABEL: bzhi64_a2_load: 536; X64-BMI1NOTBM: # %bb.0: 537; X64-BMI1NOTBM-NEXT: shll $8, %esi 538; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax 539; X64-BMI1NOTBM-NEXT: retq 540; 541; X64-BMI1BMI2-LABEL: bzhi64_a2_load: 542; X64-BMI1BMI2: # %bb.0: 543; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 544; X64-BMI1BMI2-NEXT: retq 545 %val = load i64, i64* %w 546 %onebit = shl i64 1, %numlowbits 547 %mask = add nsw i64 %onebit, -1 548 %masked = and i64 %mask, %val 549 ret i64 %masked 550} 551 552define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { 553; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext: 554; X86-NOBMI: # %bb.0: 555; X86-NOBMI-NEXT: pushl %esi 556; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 557; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 558; X86-NOBMI-NEXT: movl $1, %eax 559; X86-NOBMI-NEXT: xorl %edx, %edx 560; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 561; X86-NOBMI-NEXT: shll %cl, %eax 562; X86-NOBMI-NEXT: testb $32, %cl 563; X86-NOBMI-NEXT: je .LBB8_2 564; X86-NOBMI-NEXT: # %bb.1: 565; X86-NOBMI-NEXT: movl %eax, %edx 566; X86-NOBMI-NEXT: xorl %eax, %eax 567; X86-NOBMI-NEXT: .LBB8_2: 568; X86-NOBMI-NEXT: addl $-1, %eax 569; X86-NOBMI-NEXT: adcl $-1, %edx 570; X86-NOBMI-NEXT: andl 4(%esi), %edx 571; X86-NOBMI-NEXT: andl (%esi), %eax 572; X86-NOBMI-NEXT: popl %esi 573; X86-NOBMI-NEXT: retl 574; 575; X86-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: 576; X86-BMI1NOTBM: # %bb.0: 577; X86-BMI1NOTBM-NEXT: pushl %esi 578; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi 579; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 580; X86-BMI1NOTBM-NEXT: movl $1, %eax 581; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 582; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx 583; X86-BMI1NOTBM-NEXT: shll %cl, %eax 584; X86-BMI1NOTBM-NEXT: testb $32, %cl 585; X86-BMI1NOTBM-NEXT: je .LBB8_2 586; X86-BMI1NOTBM-NEXT: # %bb.1: 587; X86-BMI1NOTBM-NEXT: movl %eax, %edx 588; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 589; X86-BMI1NOTBM-NEXT: .LBB8_2: 590; X86-BMI1NOTBM-NEXT: addl $-1, %eax 591; X86-BMI1NOTBM-NEXT: adcl $-1, %edx 592; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx 593; X86-BMI1NOTBM-NEXT: andl (%esi), %eax 594; X86-BMI1NOTBM-NEXT: popl %esi 595; X86-BMI1NOTBM-NEXT: retl 596; 597; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: 598; X86-BMI1BMI2: # %bb.0: 599; X86-BMI1BMI2-NEXT: pushl %esi 600; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 601; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 602; X86-BMI1BMI2-NEXT: movl $1, %eax 603; X86-BMI1BMI2-NEXT: xorl %edx, %edx 604; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 605; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 606; X86-BMI1BMI2-NEXT: testb $32, %cl 607; X86-BMI1BMI2-NEXT: je .LBB8_2 608; X86-BMI1BMI2-NEXT: # %bb.1: 609; X86-BMI1BMI2-NEXT: movl %eax, %edx 610; X86-BMI1BMI2-NEXT: xorl %eax, %eax 611; X86-BMI1BMI2-NEXT: .LBB8_2: 612; X86-BMI1BMI2-NEXT: addl $-1, %eax 613; X86-BMI1BMI2-NEXT: adcl $-1, %edx 614; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx 615; X86-BMI1BMI2-NEXT: andl (%esi), %eax 616; X86-BMI1BMI2-NEXT: popl %esi 617; X86-BMI1BMI2-NEXT: retl 618; 619; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: 620; X64-NOBMI: # %bb.0: 621; X64-NOBMI-NEXT: movl %esi, %ecx 622; X64-NOBMI-NEXT: movl $1, %eax 623; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 624; X64-NOBMI-NEXT: shlq %cl, %rax 625; X64-NOBMI-NEXT: decq %rax 626; X64-NOBMI-NEXT: andq (%rdi), %rax 627; X64-NOBMI-NEXT: retq 628; 629; X64-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: 630; X64-BMI1NOTBM: # %bb.0: 631; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi 632; X64-BMI1NOTBM-NEXT: shll $8, %esi 633; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax 634; X64-BMI1NOTBM-NEXT: retq 635; 636; X64-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: 637; X64-BMI1BMI2: # %bb.0: 638; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 639; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 640; X64-BMI1BMI2-NEXT: retq 641 %val = load i64, i64* %w 642 %conv = zext i8 %numlowbits to i64 643 %onebit = shl i64 1, %conv 644 %mask = add nsw i64 %onebit, -1 645 %masked = and i64 %mask, %val 646 ret i64 %masked 647} 648 649define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { 650; X86-NOBMI-LABEL: bzhi64_a4_commutative: 651; X86-NOBMI: # %bb.0: 652; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 653; X86-NOBMI-NEXT: movl $1, %eax 654; X86-NOBMI-NEXT: xorl %edx, %edx 655; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 656; X86-NOBMI-NEXT: shll %cl, %eax 657; X86-NOBMI-NEXT: testb $32, %cl 658; X86-NOBMI-NEXT: je .LBB9_2 659; X86-NOBMI-NEXT: # %bb.1: 660; X86-NOBMI-NEXT: movl %eax, %edx 661; X86-NOBMI-NEXT: xorl %eax, %eax 662; X86-NOBMI-NEXT: .LBB9_2: 663; X86-NOBMI-NEXT: addl $-1, %eax 664; X86-NOBMI-NEXT: adcl $-1, %edx 665; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 666; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 667; X86-NOBMI-NEXT: retl 668; 669; X86-BMI1NOTBM-LABEL: bzhi64_a4_commutative: 670; X86-BMI1NOTBM: # %bb.0: 671; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 672; X86-BMI1NOTBM-NEXT: movl $1, %eax 673; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 674; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx 675; X86-BMI1NOTBM-NEXT: shll %cl, %eax 676; X86-BMI1NOTBM-NEXT: testb $32, %cl 677; X86-BMI1NOTBM-NEXT: je .LBB9_2 678; X86-BMI1NOTBM-NEXT: # %bb.1: 679; X86-BMI1NOTBM-NEXT: movl %eax, %edx 680; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 681; X86-BMI1NOTBM-NEXT: .LBB9_2: 682; X86-BMI1NOTBM-NEXT: addl $-1, %eax 683; X86-BMI1NOTBM-NEXT: adcl $-1, %edx 684; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx 685; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax 686; X86-BMI1NOTBM-NEXT: retl 687; 688; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative: 689; X86-BMI1BMI2: # %bb.0: 690; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 691; X86-BMI1BMI2-NEXT: movl $1, %eax 692; X86-BMI1BMI2-NEXT: xorl %edx, %edx 693; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 694; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 695; X86-BMI1BMI2-NEXT: testb $32, %cl 696; X86-BMI1BMI2-NEXT: je .LBB9_2 697; X86-BMI1BMI2-NEXT: # %bb.1: 698; X86-BMI1BMI2-NEXT: movl %eax, %edx 699; X86-BMI1BMI2-NEXT: xorl %eax, %eax 700; X86-BMI1BMI2-NEXT: .LBB9_2: 701; X86-BMI1BMI2-NEXT: addl $-1, %eax 702; X86-BMI1BMI2-NEXT: adcl $-1, %edx 703; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 704; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 705; X86-BMI1BMI2-NEXT: retl 706; 707; X64-NOBMI-LABEL: bzhi64_a4_commutative: 708; X64-NOBMI: # %bb.0: 709; X64-NOBMI-NEXT: movq %rsi, %rcx 710; X64-NOBMI-NEXT: movl $1, %eax 711; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 712; X64-NOBMI-NEXT: shlq %cl, %rax 713; X64-NOBMI-NEXT: decq %rax 714; X64-NOBMI-NEXT: andq %rdi, %rax 715; X64-NOBMI-NEXT: retq 716; 717; X64-BMI1NOTBM-LABEL: bzhi64_a4_commutative: 718; X64-BMI1NOTBM: # %bb.0: 719; X64-BMI1NOTBM-NEXT: shll $8, %esi 720; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 721; X64-BMI1NOTBM-NEXT: retq 722; 723; X64-BMI1BMI2-LABEL: bzhi64_a4_commutative: 724; X64-BMI1BMI2: # %bb.0: 725; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 726; X64-BMI1BMI2-NEXT: retq 727 %onebit = shl i64 1, %numlowbits 728 %mask = add nsw i64 %onebit, -1 729 %masked = and i64 %val, %mask ; swapped order 730 ret i64 %masked 731} 732 733; 64-bit, but with 32-bit output 734 735; Everything done in 64-bit, truncation happens last. 736define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { 737; X86-NOBMI-LABEL: bzhi64_32_a0: 738; X86-NOBMI: # %bb.0: 739; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 740; X86-NOBMI-NEXT: movl $1, %edx 741; X86-NOBMI-NEXT: shll %cl, %edx 742; X86-NOBMI-NEXT: xorl %eax, %eax 743; X86-NOBMI-NEXT: testb $32, %cl 744; X86-NOBMI-NEXT: jne .LBB10_2 745; X86-NOBMI-NEXT: # %bb.1: 746; X86-NOBMI-NEXT: movl %edx, %eax 747; X86-NOBMI-NEXT: .LBB10_2: 748; X86-NOBMI-NEXT: decl %eax 749; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 750; X86-NOBMI-NEXT: retl 751; 752; X86-BMI1NOTBM-LABEL: bzhi64_32_a0: 753; X86-BMI1NOTBM: # %bb.0: 754; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 755; X86-BMI1NOTBM-NEXT: movl $1, %edx 756; X86-BMI1NOTBM-NEXT: shll %cl, %edx 757; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 758; X86-BMI1NOTBM-NEXT: testb $32, %cl 759; X86-BMI1NOTBM-NEXT: jne .LBB10_2 760; X86-BMI1NOTBM-NEXT: # %bb.1: 761; X86-BMI1NOTBM-NEXT: movl %edx, %eax 762; X86-BMI1NOTBM-NEXT: .LBB10_2: 763; X86-BMI1NOTBM-NEXT: decl %eax 764; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax 765; X86-BMI1NOTBM-NEXT: retl 766; 767; X86-BMI1BMI2-LABEL: bzhi64_32_a0: 768; X86-BMI1BMI2: # %bb.0: 769; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 770; X86-BMI1BMI2-NEXT: xorl %eax, %eax 771; X86-BMI1BMI2-NEXT: testb $32, %cl 772; X86-BMI1BMI2-NEXT: jne .LBB10_2 773; X86-BMI1BMI2-NEXT: # %bb.1: 774; X86-BMI1BMI2-NEXT: movl $1, %eax 775; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 776; X86-BMI1BMI2-NEXT: .LBB10_2: 777; X86-BMI1BMI2-NEXT: decl %eax 778; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 779; X86-BMI1BMI2-NEXT: retl 780; 781; X64-NOBMI-LABEL: bzhi64_32_a0: 782; X64-NOBMI: # %bb.0: 783; X64-NOBMI-NEXT: movq %rsi, %rcx 784; X64-NOBMI-NEXT: movl $1, %eax 785; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 786; X64-NOBMI-NEXT: shlq %cl, %rax 787; X64-NOBMI-NEXT: decl %eax 788; X64-NOBMI-NEXT: andl %edi, %eax 789; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 790; X64-NOBMI-NEXT: retq 791; 792; X64-BMI1NOTBM-LABEL: bzhi64_32_a0: 793; X64-BMI1NOTBM: # %bb.0: 794; X64-BMI1NOTBM-NEXT: shll $8, %esi 795; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 796; X64-BMI1NOTBM-NEXT: retq 797; 798; X64-BMI1BMI2-LABEL: bzhi64_32_a0: 799; X64-BMI1BMI2: # %bb.0: 800; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 801; X64-BMI1BMI2-NEXT: retq 802 %onebit = shl i64 1, %numlowbits 803 %mask = add nsw i64 %onebit, -1 804 %masked = and i64 %mask, %val 805 %res = trunc i64 %masked to i32 806 ret i32 %res 807} 808 809; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 810define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { 811; X86-NOBMI-LABEL: bzhi64_32_a1: 812; X86-NOBMI: # %bb.0: 813; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 814; X86-NOBMI-NEXT: movl $1, %eax 815; X86-NOBMI-NEXT: shll %cl, %eax 816; X86-NOBMI-NEXT: decl %eax 817; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 818; X86-NOBMI-NEXT: retl 819; 820; X86-BMI1NOTBM-LABEL: bzhi64_32_a1: 821; X86-BMI1NOTBM: # %bb.0: 822; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 823; X86-BMI1NOTBM-NEXT: shll $8, %eax 824; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 825; X86-BMI1NOTBM-NEXT: retl 826; 827; X86-BMI1BMI2-LABEL: bzhi64_32_a1: 828; X86-BMI1BMI2: # %bb.0: 829; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 830; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 831; X86-BMI1BMI2-NEXT: retl 832; 833; X64-NOBMI-LABEL: bzhi64_32_a1: 834; X64-NOBMI: # %bb.0: 835; X64-NOBMI-NEXT: movl %esi, %ecx 836; X64-NOBMI-NEXT: movl $1, %eax 837; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 838; X64-NOBMI-NEXT: shll %cl, %eax 839; X64-NOBMI-NEXT: decl %eax 840; X64-NOBMI-NEXT: andl %edi, %eax 841; X64-NOBMI-NEXT: retq 842; 843; X64-BMI1NOTBM-LABEL: bzhi64_32_a1: 844; X64-BMI1NOTBM: # %bb.0: 845; X64-BMI1NOTBM-NEXT: shll $8, %esi 846; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 847; X64-BMI1NOTBM-NEXT: retq 848; 849; X64-BMI1BMI2-LABEL: bzhi64_32_a1: 850; X64-BMI1BMI2: # %bb.0: 851; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 852; X64-BMI1BMI2-NEXT: retq 853 %truncval = trunc i64 %val to i32 854 %onebit = shl i32 1, %numlowbits 855 %mask = add nsw i32 %onebit, -1 856 %masked = and i32 %mask, %truncval 857 ret i32 %masked 858} 859 860; Shifting happens in 64-bit, then truncation (with extra use). 861; Masking is 32-bit. 862define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind { 863; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: 864; X86-NOBMI: # %bb.0: 865; X86-NOBMI-NEXT: pushl %ebx 866; X86-NOBMI-NEXT: pushl %esi 867; X86-NOBMI-NEXT: pushl %eax 868; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %bl 869; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 870; X86-NOBMI-NEXT: movl %esi, (%esp) 871; X86-NOBMI-NEXT: calll use32 872; X86-NOBMI-NEXT: movl $1, %eax 873; X86-NOBMI-NEXT: movl %ebx, %ecx 874; X86-NOBMI-NEXT: shll %cl, %eax 875; X86-NOBMI-NEXT: decl %eax 876; X86-NOBMI-NEXT: andl %esi, %eax 877; X86-NOBMI-NEXT: addl $4, %esp 878; X86-NOBMI-NEXT: popl %esi 879; X86-NOBMI-NEXT: popl %ebx 880; X86-NOBMI-NEXT: retl 881; 882; X86-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause: 883; X86-BMI1NOTBM: # %bb.0: 884; X86-BMI1NOTBM-NEXT: pushl %ebx 885; X86-BMI1NOTBM-NEXT: pushl %esi 886; X86-BMI1NOTBM-NEXT: pushl %eax 887; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %bl 888; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi 889; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) 890; X86-BMI1NOTBM-NEXT: calll use32 891; X86-BMI1NOTBM-NEXT: shll $8, %ebx 892; X86-BMI1NOTBM-NEXT: bextrl %ebx, %esi, %eax 893; X86-BMI1NOTBM-NEXT: addl $4, %esp 894; X86-BMI1NOTBM-NEXT: popl %esi 895; X86-BMI1NOTBM-NEXT: popl %ebx 896; X86-BMI1NOTBM-NEXT: retl 897; 898; X86-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause: 899; X86-BMI1BMI2: # %bb.0: 900; X86-BMI1BMI2-NEXT: pushl %ebx 901; X86-BMI1BMI2-NEXT: pushl %esi 902; X86-BMI1BMI2-NEXT: pushl %eax 903; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 904; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 905; X86-BMI1BMI2-NEXT: movl %esi, (%esp) 906; X86-BMI1BMI2-NEXT: calll use32 907; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax 908; X86-BMI1BMI2-NEXT: addl $4, %esp 909; X86-BMI1BMI2-NEXT: popl %esi 910; X86-BMI1BMI2-NEXT: popl %ebx 911; X86-BMI1BMI2-NEXT: retl 912; 913; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: 914; X64-NOBMI: # %bb.0: 915; X64-NOBMI-NEXT: pushq %rbp 916; X64-NOBMI-NEXT: pushq %rbx 917; X64-NOBMI-NEXT: pushq %rax 918; X64-NOBMI-NEXT: movl %esi, %ebp 919; X64-NOBMI-NEXT: movq %rdi, %rbx 920; X64-NOBMI-NEXT: callq use32 921; X64-NOBMI-NEXT: movl $1, %eax 922; X64-NOBMI-NEXT: movl %ebp, %ecx 923; X64-NOBMI-NEXT: shll %cl, %eax 924; X64-NOBMI-NEXT: decl %eax 925; X64-NOBMI-NEXT: andl %ebx, %eax 926; X64-NOBMI-NEXT: addq $8, %rsp 927; X64-NOBMI-NEXT: popq %rbx 928; X64-NOBMI-NEXT: popq %rbp 929; X64-NOBMI-NEXT: retq 930; 931; X64-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause: 932; X64-BMI1NOTBM: # %bb.0: 933; X64-BMI1NOTBM-NEXT: pushq %r14 934; X64-BMI1NOTBM-NEXT: pushq %rbx 935; X64-BMI1NOTBM-NEXT: pushq %rax 936; X64-BMI1NOTBM-NEXT: movl %esi, %ebx 937; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 938; X64-BMI1NOTBM-NEXT: callq use32 939; X64-BMI1NOTBM-NEXT: shll $8, %ebx 940; X64-BMI1NOTBM-NEXT: bextrl %ebx, %r14d, %eax 941; X64-BMI1NOTBM-NEXT: addq $8, %rsp 942; X64-BMI1NOTBM-NEXT: popq %rbx 943; X64-BMI1NOTBM-NEXT: popq %r14 944; X64-BMI1NOTBM-NEXT: retq 945; 946; X64-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause: 947; X64-BMI1BMI2: # %bb.0: 948; X64-BMI1BMI2-NEXT: pushq %rbp 949; X64-BMI1BMI2-NEXT: pushq %rbx 950; X64-BMI1BMI2-NEXT: pushq %rax 951; X64-BMI1BMI2-NEXT: movl %esi, %ebp 952; X64-BMI1BMI2-NEXT: movq %rdi, %rbx 953; X64-BMI1BMI2-NEXT: callq use32 954; X64-BMI1BMI2-NEXT: bzhil %ebp, %ebx, %eax 955; X64-BMI1BMI2-NEXT: addq $8, %rsp 956; X64-BMI1BMI2-NEXT: popq %rbx 957; X64-BMI1BMI2-NEXT: popq %rbp 958; X64-BMI1BMI2-NEXT: retq 959 %truncval = trunc i64 %val to i32 960 call void @use32(i32 %truncval) 961 %onebit = shl i32 1, %numlowbits 962 %mask = add nsw i32 %onebit, -1 963 %masked = and i32 %mask, %truncval 964 ret i32 %masked 965} 966 967; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 968; Masking is 64-bit. Then truncation. 969define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { 970; X86-NOBMI-LABEL: bzhi64_32_a2: 971; X86-NOBMI: # %bb.0: 972; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 973; X86-NOBMI-NEXT: movl $1, %eax 974; X86-NOBMI-NEXT: shll %cl, %eax 975; X86-NOBMI-NEXT: decl %eax 976; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 977; X86-NOBMI-NEXT: retl 978; 979; X86-BMI1NOTBM-LABEL: bzhi64_32_a2: 980; X86-BMI1NOTBM: # %bb.0: 981; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 982; X86-BMI1NOTBM-NEXT: shll $8, %eax 983; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 984; X86-BMI1NOTBM-NEXT: retl 985; 986; X86-BMI1BMI2-LABEL: bzhi64_32_a2: 987; X86-BMI1BMI2: # %bb.0: 988; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 989; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 990; X86-BMI1BMI2-NEXT: retl 991; 992; X64-NOBMI-LABEL: bzhi64_32_a2: 993; X64-NOBMI: # %bb.0: 994; X64-NOBMI-NEXT: movl %esi, %ecx 995; X64-NOBMI-NEXT: movl $1, %eax 996; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 997; X64-NOBMI-NEXT: shll %cl, %eax 998; X64-NOBMI-NEXT: decl %eax 999; X64-NOBMI-NEXT: andl %edi, %eax 1000; X64-NOBMI-NEXT: retq 1001; 1002; X64-BMI1NOTBM-LABEL: bzhi64_32_a2: 1003; X64-BMI1NOTBM: # %bb.0: 1004; X64-BMI1NOTBM-NEXT: shll $8, %esi 1005; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1006; X64-BMI1NOTBM-NEXT: retq 1007; 1008; X64-BMI1BMI2-LABEL: bzhi64_32_a2: 1009; X64-BMI1BMI2: # %bb.0: 1010; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1011; X64-BMI1BMI2-NEXT: retq 1012 %onebit = shl i32 1, %numlowbits 1013 %mask = add nsw i32 %onebit, -1 1014 %zextmask = zext i32 %mask to i64 1015 %masked = and i64 %zextmask, %val 1016 %truncmasked = trunc i64 %masked to i32 1017 ret i32 %truncmasked 1018} 1019 1020; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 1021; Masking is 64-bit. Then truncation. 1022define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { 1023; X86-NOBMI-LABEL: bzhi64_32_a3: 1024; X86-NOBMI: # %bb.0: 1025; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1026; X86-NOBMI-NEXT: movl $1, %edx 1027; X86-NOBMI-NEXT: shll %cl, %edx 1028; X86-NOBMI-NEXT: xorl %eax, %eax 1029; X86-NOBMI-NEXT: testb $32, %cl 1030; X86-NOBMI-NEXT: jne .LBB14_2 1031; X86-NOBMI-NEXT: # %bb.1: 1032; X86-NOBMI-NEXT: movl %edx, %eax 1033; X86-NOBMI-NEXT: .LBB14_2: 1034; X86-NOBMI-NEXT: decl %eax 1035; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1036; X86-NOBMI-NEXT: retl 1037; 1038; X86-BMI1NOTBM-LABEL: bzhi64_32_a3: 1039; X86-BMI1NOTBM: # %bb.0: 1040; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1041; X86-BMI1NOTBM-NEXT: movl $1, %edx 1042; X86-BMI1NOTBM-NEXT: shll %cl, %edx 1043; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 1044; X86-BMI1NOTBM-NEXT: testb $32, %cl 1045; X86-BMI1NOTBM-NEXT: jne .LBB14_2 1046; X86-BMI1NOTBM-NEXT: # %bb.1: 1047; X86-BMI1NOTBM-NEXT: movl %edx, %eax 1048; X86-BMI1NOTBM-NEXT: .LBB14_2: 1049; X86-BMI1NOTBM-NEXT: decl %eax 1050; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax 1051; X86-BMI1NOTBM-NEXT: retl 1052; 1053; X86-BMI1BMI2-LABEL: bzhi64_32_a3: 1054; X86-BMI1BMI2: # %bb.0: 1055; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1056; X86-BMI1BMI2-NEXT: xorl %eax, %eax 1057; X86-BMI1BMI2-NEXT: testb $32, %cl 1058; X86-BMI1BMI2-NEXT: jne .LBB14_2 1059; X86-BMI1BMI2-NEXT: # %bb.1: 1060; X86-BMI1BMI2-NEXT: movl $1, %eax 1061; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 1062; X86-BMI1BMI2-NEXT: .LBB14_2: 1063; X86-BMI1BMI2-NEXT: decl %eax 1064; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1065; X86-BMI1BMI2-NEXT: retl 1066; 1067; X64-NOBMI-LABEL: bzhi64_32_a3: 1068; X64-NOBMI: # %bb.0: 1069; X64-NOBMI-NEXT: movq %rsi, %rcx 1070; X64-NOBMI-NEXT: movl $1, %eax 1071; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1072; X64-NOBMI-NEXT: shlq %cl, %rax 1073; X64-NOBMI-NEXT: decl %eax 1074; X64-NOBMI-NEXT: andl %edi, %eax 1075; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1076; X64-NOBMI-NEXT: retq 1077; 1078; X64-BMI1NOTBM-LABEL: bzhi64_32_a3: 1079; X64-BMI1NOTBM: # %bb.0: 1080; X64-BMI1NOTBM-NEXT: shll $8, %esi 1081; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1082; X64-BMI1NOTBM-NEXT: retq 1083; 1084; X64-BMI1BMI2-LABEL: bzhi64_32_a3: 1085; X64-BMI1BMI2: # %bb.0: 1086; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1087; X64-BMI1BMI2-NEXT: retq 1088 %onebit = shl i64 1, %numlowbits 1089 %mask = add nsw i64 %onebit, 4294967295 1090 %masked = and i64 %mask, %val 1091 %truncmasked = trunc i64 %masked to i32 1092 ret i32 %truncmasked 1093} 1094 1095; ---------------------------------------------------------------------------- ; 1096; Pattern b. 32-bit 1097; ---------------------------------------------------------------------------- ; 1098 1099define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { 1100; X86-NOBMI-LABEL: bzhi32_b0: 1101; X86-NOBMI: # %bb.0: 1102; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1103; X86-NOBMI-NEXT: movl $-1, %eax 1104; X86-NOBMI-NEXT: shll %cl, %eax 1105; X86-NOBMI-NEXT: notl %eax 1106; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1107; X86-NOBMI-NEXT: retl 1108; 1109; X86-BMI1NOTBM-LABEL: bzhi32_b0: 1110; X86-BMI1NOTBM: # %bb.0: 1111; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 1112; X86-BMI1NOTBM-NEXT: shll $8, %eax 1113; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1114; X86-BMI1NOTBM-NEXT: retl 1115; 1116; X86-BMI1BMI2-LABEL: bzhi32_b0: 1117; X86-BMI1BMI2: # %bb.0: 1118; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1119; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1120; X86-BMI1BMI2-NEXT: retl 1121; 1122; X64-NOBMI-LABEL: bzhi32_b0: 1123; X64-NOBMI: # %bb.0: 1124; X64-NOBMI-NEXT: movl %esi, %ecx 1125; X64-NOBMI-NEXT: movl $-1, %eax 1126; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1127; X64-NOBMI-NEXT: shll %cl, %eax 1128; X64-NOBMI-NEXT: notl %eax 1129; X64-NOBMI-NEXT: andl %edi, %eax 1130; X64-NOBMI-NEXT: retq 1131; 1132; X64-BMI1NOTBM-LABEL: bzhi32_b0: 1133; X64-BMI1NOTBM: # %bb.0: 1134; X64-BMI1NOTBM-NEXT: shll $8, %esi 1135; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1136; X64-BMI1NOTBM-NEXT: retq 1137; 1138; X64-BMI1BMI2-LABEL: bzhi32_b0: 1139; X64-BMI1BMI2: # %bb.0: 1140; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1141; X64-BMI1BMI2-NEXT: retq 1142 %notmask = shl i32 -1, %numlowbits 1143 %mask = xor i32 %notmask, -1 1144 %masked = and i32 %mask, %val 1145 ret i32 %masked 1146} 1147 1148define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 1149; X86-NOBMI-LABEL: bzhi32_b1_indexzext: 1150; X86-NOBMI: # %bb.0: 1151; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1152; X86-NOBMI-NEXT: movl $-1, %eax 1153; X86-NOBMI-NEXT: shll %cl, %eax 1154; X86-NOBMI-NEXT: notl %eax 1155; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1156; X86-NOBMI-NEXT: retl 1157; 1158; X86-BMI1NOTBM-LABEL: bzhi32_b1_indexzext: 1159; X86-BMI1NOTBM: # %bb.0: 1160; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 1161; X86-BMI1NOTBM-NEXT: shll $8, %eax 1162; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1163; X86-BMI1NOTBM-NEXT: retl 1164; 1165; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext: 1166; X86-BMI1BMI2: # %bb.0: 1167; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1168; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1169; X86-BMI1BMI2-NEXT: retl 1170; 1171; X64-NOBMI-LABEL: bzhi32_b1_indexzext: 1172; X64-NOBMI: # %bb.0: 1173; X64-NOBMI-NEXT: movl %esi, %ecx 1174; X64-NOBMI-NEXT: movl $-1, %eax 1175; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1176; X64-NOBMI-NEXT: shll %cl, %eax 1177; X64-NOBMI-NEXT: notl %eax 1178; X64-NOBMI-NEXT: andl %edi, %eax 1179; X64-NOBMI-NEXT: retq 1180; 1181; X64-BMI1NOTBM-LABEL: bzhi32_b1_indexzext: 1182; X64-BMI1NOTBM: # %bb.0: 1183; X64-BMI1NOTBM-NEXT: shll $8, %esi 1184; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1185; X64-BMI1NOTBM-NEXT: retq 1186; 1187; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext: 1188; X64-BMI1BMI2: # %bb.0: 1189; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1190; X64-BMI1BMI2-NEXT: retq 1191 %conv = zext i8 %numlowbits to i32 1192 %notmask = shl i32 -1, %conv 1193 %mask = xor i32 %notmask, -1 1194 %masked = and i32 %mask, %val 1195 ret i32 %masked 1196} 1197 1198define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { 1199; X86-NOBMI-LABEL: bzhi32_b2_load: 1200; X86-NOBMI: # %bb.0: 1201; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1202; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1203; X86-NOBMI-NEXT: movl $-1, %eax 1204; X86-NOBMI-NEXT: shll %cl, %eax 1205; X86-NOBMI-NEXT: notl %eax 1206; X86-NOBMI-NEXT: andl (%edx), %eax 1207; X86-NOBMI-NEXT: retl 1208; 1209; X86-BMI1NOTBM-LABEL: bzhi32_b2_load: 1210; X86-BMI1NOTBM: # %bb.0: 1211; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 1212; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1213; X86-BMI1NOTBM-NEXT: shll $8, %ecx 1214; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax 1215; X86-BMI1NOTBM-NEXT: retl 1216; 1217; X86-BMI1BMI2-LABEL: bzhi32_b2_load: 1218; X86-BMI1BMI2: # %bb.0: 1219; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1220; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1221; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 1222; X86-BMI1BMI2-NEXT: retl 1223; 1224; X64-NOBMI-LABEL: bzhi32_b2_load: 1225; X64-NOBMI: # %bb.0: 1226; X64-NOBMI-NEXT: movl %esi, %ecx 1227; X64-NOBMI-NEXT: movl $-1, %eax 1228; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1229; X64-NOBMI-NEXT: shll %cl, %eax 1230; X64-NOBMI-NEXT: notl %eax 1231; X64-NOBMI-NEXT: andl (%rdi), %eax 1232; X64-NOBMI-NEXT: retq 1233; 1234; X64-BMI1NOTBM-LABEL: bzhi32_b2_load: 1235; X64-BMI1NOTBM: # %bb.0: 1236; X64-BMI1NOTBM-NEXT: shll $8, %esi 1237; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax 1238; X64-BMI1NOTBM-NEXT: retq 1239; 1240; X64-BMI1BMI2-LABEL: bzhi32_b2_load: 1241; X64-BMI1BMI2: # %bb.0: 1242; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 1243; X64-BMI1BMI2-NEXT: retq 1244 %val = load i32, i32* %w 1245 %notmask = shl i32 -1, %numlowbits 1246 %mask = xor i32 %notmask, -1 1247 %masked = and i32 %mask, %val 1248 ret i32 %masked 1249} 1250 1251define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { 1252; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext: 1253; X86-NOBMI: # %bb.0: 1254; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1255; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1256; X86-NOBMI-NEXT: movl $-1, %eax 1257; X86-NOBMI-NEXT: shll %cl, %eax 1258; X86-NOBMI-NEXT: notl %eax 1259; X86-NOBMI-NEXT: andl (%edx), %eax 1260; X86-NOBMI-NEXT: retl 1261; 1262; X86-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext: 1263; X86-BMI1NOTBM: # %bb.0: 1264; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 1265; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1266; X86-BMI1NOTBM-NEXT: shll $8, %ecx 1267; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax 1268; X86-BMI1NOTBM-NEXT: retl 1269; 1270; X86-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: 1271; X86-BMI1BMI2: # %bb.0: 1272; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1273; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1274; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 1275; X86-BMI1BMI2-NEXT: retl 1276; 1277; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: 1278; X64-NOBMI: # %bb.0: 1279; X64-NOBMI-NEXT: movl %esi, %ecx 1280; X64-NOBMI-NEXT: movl $-1, %eax 1281; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1282; X64-NOBMI-NEXT: shll %cl, %eax 1283; X64-NOBMI-NEXT: notl %eax 1284; X64-NOBMI-NEXT: andl (%rdi), %eax 1285; X64-NOBMI-NEXT: retq 1286; 1287; X64-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext: 1288; X64-BMI1NOTBM: # %bb.0: 1289; X64-BMI1NOTBM-NEXT: shll $8, %esi 1290; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax 1291; X64-BMI1NOTBM-NEXT: retq 1292; 1293; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: 1294; X64-BMI1BMI2: # %bb.0: 1295; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 1296; X64-BMI1BMI2-NEXT: retq 1297 %val = load i32, i32* %w 1298 %conv = zext i8 %numlowbits to i32 1299 %notmask = shl i32 -1, %conv 1300 %mask = xor i32 %notmask, -1 1301 %masked = and i32 %mask, %val 1302 ret i32 %masked 1303} 1304 1305define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { 1306; X86-NOBMI-LABEL: bzhi32_b4_commutative: 1307; X86-NOBMI: # %bb.0: 1308; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1309; X86-NOBMI-NEXT: movl $-1, %eax 1310; X86-NOBMI-NEXT: shll %cl, %eax 1311; X86-NOBMI-NEXT: notl %eax 1312; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1313; X86-NOBMI-NEXT: retl 1314; 1315; X86-BMI1NOTBM-LABEL: bzhi32_b4_commutative: 1316; X86-BMI1NOTBM: # %bb.0: 1317; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 1318; X86-BMI1NOTBM-NEXT: shll $8, %eax 1319; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1320; X86-BMI1NOTBM-NEXT: retl 1321; 1322; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative: 1323; X86-BMI1BMI2: # %bb.0: 1324; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1325; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1326; X86-BMI1BMI2-NEXT: retl 1327; 1328; X64-NOBMI-LABEL: bzhi32_b4_commutative: 1329; X64-NOBMI: # %bb.0: 1330; X64-NOBMI-NEXT: movl %esi, %ecx 1331; X64-NOBMI-NEXT: movl $-1, %eax 1332; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1333; X64-NOBMI-NEXT: shll %cl, %eax 1334; X64-NOBMI-NEXT: notl %eax 1335; X64-NOBMI-NEXT: andl %edi, %eax 1336; X64-NOBMI-NEXT: retq 1337; 1338; X64-BMI1NOTBM-LABEL: bzhi32_b4_commutative: 1339; X64-BMI1NOTBM: # %bb.0: 1340; X64-BMI1NOTBM-NEXT: shll $8, %esi 1341; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1342; X64-BMI1NOTBM-NEXT: retq 1343; 1344; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative: 1345; X64-BMI1BMI2: # %bb.0: 1346; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1347; X64-BMI1BMI2-NEXT: retq 1348 %notmask = shl i32 -1, %numlowbits 1349 %mask = xor i32 %notmask, -1 1350 %masked = and i32 %val, %mask ; swapped order 1351 ret i32 %masked 1352} 1353 1354; 64-bit 1355 1356define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { 1357; X86-NOBMI-LABEL: bzhi64_b0: 1358; X86-NOBMI: # %bb.0: 1359; X86-NOBMI-NEXT: pushl %esi 1360; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1361; X86-NOBMI-NEXT: movl $-1, %edx 1362; X86-NOBMI-NEXT: movl $-1, %esi 1363; X86-NOBMI-NEXT: shll %cl, %esi 1364; X86-NOBMI-NEXT: xorl %eax, %eax 1365; X86-NOBMI-NEXT: testb $32, %cl 1366; X86-NOBMI-NEXT: jne .LBB20_1 1367; X86-NOBMI-NEXT: # %bb.2: 1368; X86-NOBMI-NEXT: movl %esi, %eax 1369; X86-NOBMI-NEXT: jmp .LBB20_3 1370; X86-NOBMI-NEXT: .LBB20_1: 1371; X86-NOBMI-NEXT: movl %esi, %edx 1372; X86-NOBMI-NEXT: .LBB20_3: 1373; X86-NOBMI-NEXT: notl %edx 1374; X86-NOBMI-NEXT: notl %eax 1375; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1376; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1377; X86-NOBMI-NEXT: popl %esi 1378; X86-NOBMI-NEXT: retl 1379; 1380; X86-BMI1NOTBM-LABEL: bzhi64_b0: 1381; X86-BMI1NOTBM: # %bb.0: 1382; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1383; X86-BMI1NOTBM-NEXT: movl $-1, %edx 1384; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1385; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1386; X86-BMI1NOTBM-NEXT: testb $32, %cl 1387; X86-BMI1NOTBM-NEXT: je .LBB20_2 1388; X86-BMI1NOTBM-NEXT: # %bb.1: 1389; X86-BMI1NOTBM-NEXT: movl %eax, %edx 1390; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 1391; X86-BMI1NOTBM-NEXT: .LBB20_2: 1392; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1393; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1394; X86-BMI1NOTBM-NEXT: retl 1395; 1396; X86-BMI1BMI2-LABEL: bzhi64_b0: 1397; X86-BMI1BMI2: # %bb.0: 1398; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl 1399; X86-BMI1BMI2-NEXT: movl $-1, %ecx 1400; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax 1401; X86-BMI1BMI2-NEXT: testb $32, %dl 1402; X86-BMI1BMI2-NEXT: je .LBB20_2 1403; X86-BMI1BMI2-NEXT: # %bb.1: 1404; X86-BMI1BMI2-NEXT: movl %eax, %ecx 1405; X86-BMI1BMI2-NEXT: xorl %eax, %eax 1406; X86-BMI1BMI2-NEXT: .LBB20_2: 1407; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1408; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1409; X86-BMI1BMI2-NEXT: retl 1410; 1411; X64-NOBMI-LABEL: bzhi64_b0: 1412; X64-NOBMI: # %bb.0: 1413; X64-NOBMI-NEXT: movq %rsi, %rcx 1414; X64-NOBMI-NEXT: movq $-1, %rax 1415; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1416; X64-NOBMI-NEXT: shlq %cl, %rax 1417; X64-NOBMI-NEXT: notq %rax 1418; X64-NOBMI-NEXT: andq %rdi, %rax 1419; X64-NOBMI-NEXT: retq 1420; 1421; X64-BMI1NOTBM-LABEL: bzhi64_b0: 1422; X64-BMI1NOTBM: # %bb.0: 1423; X64-BMI1NOTBM-NEXT: shll $8, %esi 1424; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 1425; X64-BMI1NOTBM-NEXT: retq 1426; 1427; X64-BMI1BMI2-LABEL: bzhi64_b0: 1428; X64-BMI1BMI2: # %bb.0: 1429; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1430; X64-BMI1BMI2-NEXT: retq 1431 %notmask = shl i64 -1, %numlowbits 1432 %mask = xor i64 %notmask, -1 1433 %masked = and i64 %mask, %val 1434 ret i64 %masked 1435} 1436 1437define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 1438; X86-NOBMI-LABEL: bzhi64_b1_indexzext: 1439; X86-NOBMI: # %bb.0: 1440; X86-NOBMI-NEXT: pushl %esi 1441; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1442; X86-NOBMI-NEXT: movl $-1, %edx 1443; X86-NOBMI-NEXT: movl $-1, %esi 1444; X86-NOBMI-NEXT: shll %cl, %esi 1445; X86-NOBMI-NEXT: xorl %eax, %eax 1446; X86-NOBMI-NEXT: testb $32, %cl 1447; X86-NOBMI-NEXT: jne .LBB21_1 1448; X86-NOBMI-NEXT: # %bb.2: 1449; X86-NOBMI-NEXT: movl %esi, %eax 1450; X86-NOBMI-NEXT: jmp .LBB21_3 1451; X86-NOBMI-NEXT: .LBB21_1: 1452; X86-NOBMI-NEXT: movl %esi, %edx 1453; X86-NOBMI-NEXT: .LBB21_3: 1454; X86-NOBMI-NEXT: notl %edx 1455; X86-NOBMI-NEXT: notl %eax 1456; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1457; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1458; X86-NOBMI-NEXT: popl %esi 1459; X86-NOBMI-NEXT: retl 1460; 1461; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: 1462; X86-BMI1NOTBM: # %bb.0: 1463; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1464; X86-BMI1NOTBM-NEXT: movl $-1, %edx 1465; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1466; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1467; X86-BMI1NOTBM-NEXT: testb $32, %cl 1468; X86-BMI1NOTBM-NEXT: je .LBB21_2 1469; X86-BMI1NOTBM-NEXT: # %bb.1: 1470; X86-BMI1NOTBM-NEXT: movl %eax, %edx 1471; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 1472; X86-BMI1NOTBM-NEXT: .LBB21_2: 1473; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1474; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1475; X86-BMI1NOTBM-NEXT: retl 1476; 1477; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: 1478; X86-BMI1BMI2: # %bb.0: 1479; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl 1480; X86-BMI1BMI2-NEXT: movl $-1, %ecx 1481; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax 1482; X86-BMI1BMI2-NEXT: testb $32, %dl 1483; X86-BMI1BMI2-NEXT: je .LBB21_2 1484; X86-BMI1BMI2-NEXT: # %bb.1: 1485; X86-BMI1BMI2-NEXT: movl %eax, %ecx 1486; X86-BMI1BMI2-NEXT: xorl %eax, %eax 1487; X86-BMI1BMI2-NEXT: .LBB21_2: 1488; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1489; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1490; X86-BMI1BMI2-NEXT: retl 1491; 1492; X64-NOBMI-LABEL: bzhi64_b1_indexzext: 1493; X64-NOBMI: # %bb.0: 1494; X64-NOBMI-NEXT: movl %esi, %ecx 1495; X64-NOBMI-NEXT: movq $-1, %rax 1496; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1497; X64-NOBMI-NEXT: shlq %cl, %rax 1498; X64-NOBMI-NEXT: notq %rax 1499; X64-NOBMI-NEXT: andq %rdi, %rax 1500; X64-NOBMI-NEXT: retq 1501; 1502; X64-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: 1503; X64-BMI1NOTBM: # %bb.0: 1504; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi 1505; X64-BMI1NOTBM-NEXT: shll $8, %esi 1506; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 1507; X64-BMI1NOTBM-NEXT: retq 1508; 1509; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext: 1510; X64-BMI1BMI2: # %bb.0: 1511; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1512; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1513; X64-BMI1BMI2-NEXT: retq 1514 %conv = zext i8 %numlowbits to i64 1515 %notmask = shl i64 -1, %conv 1516 %mask = xor i64 %notmask, -1 1517 %masked = and i64 %mask, %val 1518 ret i64 %masked 1519} 1520 1521define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { 1522; X86-NOBMI-LABEL: bzhi64_b2_load: 1523; X86-NOBMI: # %bb.0: 1524; X86-NOBMI-NEXT: pushl %edi 1525; X86-NOBMI-NEXT: pushl %esi 1526; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1527; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1528; X86-NOBMI-NEXT: movl $-1, %edx 1529; X86-NOBMI-NEXT: movl $-1, %edi 1530; X86-NOBMI-NEXT: shll %cl, %edi 1531; X86-NOBMI-NEXT: xorl %eax, %eax 1532; X86-NOBMI-NEXT: testb $32, %cl 1533; X86-NOBMI-NEXT: jne .LBB22_1 1534; X86-NOBMI-NEXT: # %bb.2: 1535; X86-NOBMI-NEXT: movl %edi, %eax 1536; X86-NOBMI-NEXT: jmp .LBB22_3 1537; X86-NOBMI-NEXT: .LBB22_1: 1538; X86-NOBMI-NEXT: movl %edi, %edx 1539; X86-NOBMI-NEXT: .LBB22_3: 1540; X86-NOBMI-NEXT: notl %edx 1541; X86-NOBMI-NEXT: notl %eax 1542; X86-NOBMI-NEXT: andl (%esi), %eax 1543; X86-NOBMI-NEXT: andl 4(%esi), %edx 1544; X86-NOBMI-NEXT: popl %esi 1545; X86-NOBMI-NEXT: popl %edi 1546; X86-NOBMI-NEXT: retl 1547; 1548; X86-BMI1NOTBM-LABEL: bzhi64_b2_load: 1549; X86-BMI1NOTBM: # %bb.0: 1550; X86-BMI1NOTBM-NEXT: pushl %esi 1551; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx 1552; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1553; X86-BMI1NOTBM-NEXT: movl $-1, %esi 1554; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1555; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1556; X86-BMI1NOTBM-NEXT: testb $32, %cl 1557; X86-BMI1NOTBM-NEXT: je .LBB22_2 1558; X86-BMI1NOTBM-NEXT: # %bb.1: 1559; X86-BMI1NOTBM-NEXT: movl %eax, %esi 1560; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 1561; X86-BMI1NOTBM-NEXT: .LBB22_2: 1562; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax 1563; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx 1564; X86-BMI1NOTBM-NEXT: popl %esi 1565; X86-BMI1NOTBM-NEXT: retl 1566; 1567; X86-BMI1BMI2-LABEL: bzhi64_b2_load: 1568; X86-BMI1BMI2: # %bb.0: 1569; X86-BMI1BMI2-NEXT: pushl %ebx 1570; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1571; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 1572; X86-BMI1BMI2-NEXT: movl $-1, %edx 1573; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax 1574; X86-BMI1BMI2-NEXT: testb $32, %bl 1575; X86-BMI1BMI2-NEXT: je .LBB22_2 1576; X86-BMI1BMI2-NEXT: # %bb.1: 1577; X86-BMI1BMI2-NEXT: movl %eax, %edx 1578; X86-BMI1BMI2-NEXT: xorl %eax, %eax 1579; X86-BMI1BMI2-NEXT: .LBB22_2: 1580; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax 1581; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx 1582; X86-BMI1BMI2-NEXT: popl %ebx 1583; X86-BMI1BMI2-NEXT: retl 1584; 1585; X64-NOBMI-LABEL: bzhi64_b2_load: 1586; X64-NOBMI: # %bb.0: 1587; X64-NOBMI-NEXT: movq %rsi, %rcx 1588; X64-NOBMI-NEXT: movq $-1, %rax 1589; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1590; X64-NOBMI-NEXT: shlq %cl, %rax 1591; X64-NOBMI-NEXT: notq %rax 1592; X64-NOBMI-NEXT: andq (%rdi), %rax 1593; X64-NOBMI-NEXT: retq 1594; 1595; X64-BMI1NOTBM-LABEL: bzhi64_b2_load: 1596; X64-BMI1NOTBM: # %bb.0: 1597; X64-BMI1NOTBM-NEXT: shll $8, %esi 1598; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax 1599; X64-BMI1NOTBM-NEXT: retq 1600; 1601; X64-BMI1BMI2-LABEL: bzhi64_b2_load: 1602; X64-BMI1BMI2: # %bb.0: 1603; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1604; X64-BMI1BMI2-NEXT: retq 1605 %val = load i64, i64* %w 1606 %notmask = shl i64 -1, %numlowbits 1607 %mask = xor i64 %notmask, -1 1608 %masked = and i64 %mask, %val 1609 ret i64 %masked 1610} 1611 1612define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { 1613; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext: 1614; X86-NOBMI: # %bb.0: 1615; X86-NOBMI-NEXT: pushl %edi 1616; X86-NOBMI-NEXT: pushl %esi 1617; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1618; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1619; X86-NOBMI-NEXT: movl $-1, %edx 1620; X86-NOBMI-NEXT: movl $-1, %edi 1621; X86-NOBMI-NEXT: shll %cl, %edi 1622; X86-NOBMI-NEXT: xorl %eax, %eax 1623; X86-NOBMI-NEXT: testb $32, %cl 1624; X86-NOBMI-NEXT: jne .LBB23_1 1625; X86-NOBMI-NEXT: # %bb.2: 1626; X86-NOBMI-NEXT: movl %edi, %eax 1627; X86-NOBMI-NEXT: jmp .LBB23_3 1628; X86-NOBMI-NEXT: .LBB23_1: 1629; X86-NOBMI-NEXT: movl %edi, %edx 1630; X86-NOBMI-NEXT: .LBB23_3: 1631; X86-NOBMI-NEXT: notl %edx 1632; X86-NOBMI-NEXT: notl %eax 1633; X86-NOBMI-NEXT: andl (%esi), %eax 1634; X86-NOBMI-NEXT: andl 4(%esi), %edx 1635; X86-NOBMI-NEXT: popl %esi 1636; X86-NOBMI-NEXT: popl %edi 1637; X86-NOBMI-NEXT: retl 1638; 1639; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: 1640; X86-BMI1NOTBM: # %bb.0: 1641; X86-BMI1NOTBM-NEXT: pushl %esi 1642; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx 1643; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1644; X86-BMI1NOTBM-NEXT: movl $-1, %esi 1645; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1646; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1647; X86-BMI1NOTBM-NEXT: testb $32, %cl 1648; X86-BMI1NOTBM-NEXT: je .LBB23_2 1649; X86-BMI1NOTBM-NEXT: # %bb.1: 1650; X86-BMI1NOTBM-NEXT: movl %eax, %esi 1651; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 1652; X86-BMI1NOTBM-NEXT: .LBB23_2: 1653; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax 1654; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx 1655; X86-BMI1NOTBM-NEXT: popl %esi 1656; X86-BMI1NOTBM-NEXT: retl 1657; 1658; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: 1659; X86-BMI1BMI2: # %bb.0: 1660; X86-BMI1BMI2-NEXT: pushl %ebx 1661; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1662; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 1663; X86-BMI1BMI2-NEXT: movl $-1, %edx 1664; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax 1665; X86-BMI1BMI2-NEXT: testb $32, %bl 1666; X86-BMI1BMI2-NEXT: je .LBB23_2 1667; X86-BMI1BMI2-NEXT: # %bb.1: 1668; X86-BMI1BMI2-NEXT: movl %eax, %edx 1669; X86-BMI1BMI2-NEXT: xorl %eax, %eax 1670; X86-BMI1BMI2-NEXT: .LBB23_2: 1671; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax 1672; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx 1673; X86-BMI1BMI2-NEXT: popl %ebx 1674; X86-BMI1BMI2-NEXT: retl 1675; 1676; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: 1677; X64-NOBMI: # %bb.0: 1678; X64-NOBMI-NEXT: movl %esi, %ecx 1679; X64-NOBMI-NEXT: movq $-1, %rax 1680; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1681; X64-NOBMI-NEXT: shlq %cl, %rax 1682; X64-NOBMI-NEXT: notq %rax 1683; X64-NOBMI-NEXT: andq (%rdi), %rax 1684; X64-NOBMI-NEXT: retq 1685; 1686; X64-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: 1687; X64-BMI1NOTBM: # %bb.0: 1688; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi 1689; X64-BMI1NOTBM-NEXT: shll $8, %esi 1690; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax 1691; X64-BMI1NOTBM-NEXT: retq 1692; 1693; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: 1694; X64-BMI1BMI2: # %bb.0: 1695; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1696; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1697; X64-BMI1BMI2-NEXT: retq 1698 %val = load i64, i64* %w 1699 %conv = zext i8 %numlowbits to i64 1700 %notmask = shl i64 -1, %conv 1701 %mask = xor i64 %notmask, -1 1702 %masked = and i64 %mask, %val 1703 ret i64 %masked 1704} 1705 1706define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { 1707; X86-NOBMI-LABEL: bzhi64_b4_commutative: 1708; X86-NOBMI: # %bb.0: 1709; X86-NOBMI-NEXT: pushl %esi 1710; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1711; X86-NOBMI-NEXT: movl $-1, %edx 1712; X86-NOBMI-NEXT: movl $-1, %esi 1713; X86-NOBMI-NEXT: shll %cl, %esi 1714; X86-NOBMI-NEXT: xorl %eax, %eax 1715; X86-NOBMI-NEXT: testb $32, %cl 1716; X86-NOBMI-NEXT: jne .LBB24_1 1717; X86-NOBMI-NEXT: # %bb.2: 1718; X86-NOBMI-NEXT: movl %esi, %eax 1719; X86-NOBMI-NEXT: jmp .LBB24_3 1720; X86-NOBMI-NEXT: .LBB24_1: 1721; X86-NOBMI-NEXT: movl %esi, %edx 1722; X86-NOBMI-NEXT: .LBB24_3: 1723; X86-NOBMI-NEXT: notl %edx 1724; X86-NOBMI-NEXT: notl %eax 1725; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1726; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1727; X86-NOBMI-NEXT: popl %esi 1728; X86-NOBMI-NEXT: retl 1729; 1730; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative: 1731; X86-BMI1NOTBM: # %bb.0: 1732; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1733; X86-BMI1NOTBM-NEXT: movl $-1, %edx 1734; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1735; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1736; X86-BMI1NOTBM-NEXT: testb $32, %cl 1737; X86-BMI1NOTBM-NEXT: je .LBB24_2 1738; X86-BMI1NOTBM-NEXT: # %bb.1: 1739; X86-BMI1NOTBM-NEXT: movl %eax, %edx 1740; X86-BMI1NOTBM-NEXT: xorl %eax, %eax 1741; X86-BMI1NOTBM-NEXT: .LBB24_2: 1742; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1743; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1744; X86-BMI1NOTBM-NEXT: retl 1745; 1746; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: 1747; X86-BMI1BMI2: # %bb.0: 1748; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl 1749; X86-BMI1BMI2-NEXT: movl $-1, %ecx 1750; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax 1751; X86-BMI1BMI2-NEXT: testb $32, %dl 1752; X86-BMI1BMI2-NEXT: je .LBB24_2 1753; X86-BMI1BMI2-NEXT: # %bb.1: 1754; X86-BMI1BMI2-NEXT: movl %eax, %ecx 1755; X86-BMI1BMI2-NEXT: xorl %eax, %eax 1756; X86-BMI1BMI2-NEXT: .LBB24_2: 1757; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1758; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1759; X86-BMI1BMI2-NEXT: retl 1760; 1761; X64-NOBMI-LABEL: bzhi64_b4_commutative: 1762; X64-NOBMI: # %bb.0: 1763; X64-NOBMI-NEXT: movq %rsi, %rcx 1764; X64-NOBMI-NEXT: movq $-1, %rax 1765; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1766; X64-NOBMI-NEXT: shlq %cl, %rax 1767; X64-NOBMI-NEXT: notq %rax 1768; X64-NOBMI-NEXT: andq %rdi, %rax 1769; X64-NOBMI-NEXT: retq 1770; 1771; X64-BMI1NOTBM-LABEL: bzhi64_b4_commutative: 1772; X64-BMI1NOTBM: # %bb.0: 1773; X64-BMI1NOTBM-NEXT: shll $8, %esi 1774; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 1775; X64-BMI1NOTBM-NEXT: retq 1776; 1777; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative: 1778; X64-BMI1BMI2: # %bb.0: 1779; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1780; X64-BMI1BMI2-NEXT: retq 1781 %notmask = shl i64 -1, %numlowbits 1782 %mask = xor i64 %notmask, -1 1783 %masked = and i64 %val, %mask ; swapped order 1784 ret i64 %masked 1785} 1786 1787; 64-bit, but with 32-bit output 1788 1789; Everything done in 64-bit, truncation happens last. 1790define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { 1791; X86-NOBMI-LABEL: bzhi64_32_b0: 1792; X86-NOBMI: # %bb.0: 1793; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1794; X86-NOBMI-NEXT: movl $-1, %edx 1795; X86-NOBMI-NEXT: shll %cl, %edx 1796; X86-NOBMI-NEXT: xorl %eax, %eax 1797; X86-NOBMI-NEXT: testb $32, %cl 1798; X86-NOBMI-NEXT: jne .LBB25_2 1799; X86-NOBMI-NEXT: # %bb.1: 1800; X86-NOBMI-NEXT: movl %edx, %eax 1801; X86-NOBMI-NEXT: .LBB25_2: 1802; X86-NOBMI-NEXT: notl %eax 1803; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1804; X86-NOBMI-NEXT: retl 1805; 1806; X86-BMI1NOTBM-LABEL: bzhi64_32_b0: 1807; X86-BMI1NOTBM: # %bb.0: 1808; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1809; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1810; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1811; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 1812; X86-BMI1NOTBM-NEXT: testb $32, %cl 1813; X86-BMI1NOTBM-NEXT: jne .LBB25_2 1814; X86-BMI1NOTBM-NEXT: # %bb.1: 1815; X86-BMI1NOTBM-NEXT: movl %eax, %edx 1816; X86-BMI1NOTBM-NEXT: .LBB25_2: 1817; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax 1818; X86-BMI1NOTBM-NEXT: retl 1819; 1820; X86-BMI1BMI2-LABEL: bzhi64_32_b0: 1821; X86-BMI1BMI2: # %bb.0: 1822; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1823; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx 1824; X86-BMI1BMI2-NEXT: testb $32, %al 1825; X86-BMI1BMI2-NEXT: jne .LBB25_2 1826; X86-BMI1BMI2-NEXT: # %bb.1: 1827; X86-BMI1BMI2-NEXT: movl $-1, %ecx 1828; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx 1829; X86-BMI1BMI2-NEXT: .LBB25_2: 1830; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax 1831; X86-BMI1BMI2-NEXT: retl 1832; 1833; X64-NOBMI-LABEL: bzhi64_32_b0: 1834; X64-NOBMI: # %bb.0: 1835; X64-NOBMI-NEXT: movl %esi, %ecx 1836; X64-NOBMI-NEXT: movq $-1, %rax 1837; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1838; X64-NOBMI-NEXT: shlq %cl, %rax 1839; X64-NOBMI-NEXT: notl %eax 1840; X64-NOBMI-NEXT: andl %edi, %eax 1841; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1842; X64-NOBMI-NEXT: retq 1843; 1844; X64-BMI1NOTBM-LABEL: bzhi64_32_b0: 1845; X64-BMI1NOTBM: # %bb.0: 1846; X64-BMI1NOTBM-NEXT: shll $8, %esi 1847; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1848; X64-BMI1NOTBM-NEXT: retq 1849; 1850; X64-BMI1BMI2-LABEL: bzhi64_32_b0: 1851; X64-BMI1BMI2: # %bb.0: 1852; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1853; X64-BMI1BMI2-NEXT: retq 1854 %widenumlowbits = zext i8 %numlowbits to i64 1855 %notmask = shl nsw i64 -1, %widenumlowbits 1856 %mask = xor i64 %notmask, -1 1857 %wideres = and i64 %val, %mask 1858 %res = trunc i64 %wideres to i32 1859 ret i32 %res 1860} 1861 1862; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 1863define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { 1864; X86-NOBMI-LABEL: bzhi64_32_b1: 1865; X86-NOBMI: # %bb.0: 1866; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1867; X86-NOBMI-NEXT: movl $-1, %eax 1868; X86-NOBMI-NEXT: shll %cl, %eax 1869; X86-NOBMI-NEXT: notl %eax 1870; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1871; X86-NOBMI-NEXT: retl 1872; 1873; X86-BMI1NOTBM-LABEL: bzhi64_32_b1: 1874; X86-BMI1NOTBM: # %bb.0: 1875; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 1876; X86-BMI1NOTBM-NEXT: shll $8, %eax 1877; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1878; X86-BMI1NOTBM-NEXT: retl 1879; 1880; X86-BMI1BMI2-LABEL: bzhi64_32_b1: 1881; X86-BMI1BMI2: # %bb.0: 1882; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1883; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1884; X86-BMI1BMI2-NEXT: retl 1885; 1886; X64-NOBMI-LABEL: bzhi64_32_b1: 1887; X64-NOBMI: # %bb.0: 1888; X64-NOBMI-NEXT: movl %esi, %ecx 1889; X64-NOBMI-NEXT: movl $-1, %eax 1890; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1891; X64-NOBMI-NEXT: shll %cl, %eax 1892; X64-NOBMI-NEXT: notl %eax 1893; X64-NOBMI-NEXT: andl %edi, %eax 1894; X64-NOBMI-NEXT: retq 1895; 1896; X64-BMI1NOTBM-LABEL: bzhi64_32_b1: 1897; X64-BMI1NOTBM: # %bb.0: 1898; X64-BMI1NOTBM-NEXT: shll $8, %esi 1899; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1900; X64-BMI1NOTBM-NEXT: retq 1901; 1902; X64-BMI1BMI2-LABEL: bzhi64_32_b1: 1903; X64-BMI1BMI2: # %bb.0: 1904; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1905; X64-BMI1BMI2-NEXT: retq 1906 %truncval = trunc i64 %val to i32 1907 %widenumlowbits = zext i8 %numlowbits to i32 1908 %notmask = shl nsw i32 -1, %widenumlowbits 1909 %mask = xor i32 %notmask, -1 1910 %res = and i32 %truncval, %mask 1911 ret i32 %res 1912} 1913 1914; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 1915; Masking is 64-bit. Then truncation. 1916define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { 1917; X86-NOBMI-LABEL: bzhi64_32_b2: 1918; X86-NOBMI: # %bb.0: 1919; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1920; X86-NOBMI-NEXT: movl $-1, %eax 1921; X86-NOBMI-NEXT: shll %cl, %eax 1922; X86-NOBMI-NEXT: notl %eax 1923; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1924; X86-NOBMI-NEXT: retl 1925; 1926; X86-BMI1NOTBM-LABEL: bzhi64_32_b2: 1927; X86-BMI1NOTBM: # %bb.0: 1928; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 1929; X86-BMI1NOTBM-NEXT: shll $8, %eax 1930; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1931; X86-BMI1NOTBM-NEXT: retl 1932; 1933; X86-BMI1BMI2-LABEL: bzhi64_32_b2: 1934; X86-BMI1BMI2: # %bb.0: 1935; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1936; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1937; X86-BMI1BMI2-NEXT: retl 1938; 1939; X64-NOBMI-LABEL: bzhi64_32_b2: 1940; X64-NOBMI: # %bb.0: 1941; X64-NOBMI-NEXT: movl %esi, %ecx 1942; X64-NOBMI-NEXT: movl $-1, %eax 1943; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1944; X64-NOBMI-NEXT: shll %cl, %eax 1945; X64-NOBMI-NEXT: notl %eax 1946; X64-NOBMI-NEXT: andl %edi, %eax 1947; X64-NOBMI-NEXT: retq 1948; 1949; X64-BMI1NOTBM-LABEL: bzhi64_32_b2: 1950; X64-BMI1NOTBM: # %bb.0: 1951; X64-BMI1NOTBM-NEXT: shll $8, %esi 1952; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 1953; X64-BMI1NOTBM-NEXT: retq 1954; 1955; X64-BMI1BMI2-LABEL: bzhi64_32_b2: 1956; X64-BMI1BMI2: # %bb.0: 1957; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1958; X64-BMI1BMI2-NEXT: retq 1959 %widenumlowbits = zext i8 %numlowbits to i32 1960 %notmask = shl nsw i32 -1, %widenumlowbits 1961 %mask = xor i32 %notmask, -1 1962 %zextmask = zext i32 %mask to i64 1963 %wideres = and i64 %val, %zextmask 1964 %res = trunc i64 %wideres to i32 1965 ret i32 %res 1966} 1967 1968; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 1969; Masking is 64-bit. Then truncation. 1970define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { 1971; X86-NOBMI-LABEL: bzhi64_32_b3: 1972; X86-NOBMI: # %bb.0: 1973; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 1974; X86-NOBMI-NEXT: movl $-1, %edx 1975; X86-NOBMI-NEXT: shll %cl, %edx 1976; X86-NOBMI-NEXT: xorl %eax, %eax 1977; X86-NOBMI-NEXT: testb $32, %cl 1978; X86-NOBMI-NEXT: jne .LBB28_2 1979; X86-NOBMI-NEXT: # %bb.1: 1980; X86-NOBMI-NEXT: movl %edx, %eax 1981; X86-NOBMI-NEXT: .LBB28_2: 1982; X86-NOBMI-NEXT: notl %eax 1983; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1984; X86-NOBMI-NEXT: retl 1985; 1986; X86-BMI1NOTBM-LABEL: bzhi64_32_b3: 1987; X86-BMI1NOTBM: # %bb.0: 1988; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 1989; X86-BMI1NOTBM-NEXT: movl $-1, %eax 1990; X86-BMI1NOTBM-NEXT: shll %cl, %eax 1991; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 1992; X86-BMI1NOTBM-NEXT: testb $32, %cl 1993; X86-BMI1NOTBM-NEXT: jne .LBB28_2 1994; X86-BMI1NOTBM-NEXT: # %bb.1: 1995; X86-BMI1NOTBM-NEXT: movl %eax, %edx 1996; X86-BMI1NOTBM-NEXT: .LBB28_2: 1997; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax 1998; X86-BMI1NOTBM-NEXT: retl 1999; 2000; X86-BMI1BMI2-LABEL: bzhi64_32_b3: 2001; X86-BMI1BMI2: # %bb.0: 2002; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 2003; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx 2004; X86-BMI1BMI2-NEXT: testb $32, %al 2005; X86-BMI1BMI2-NEXT: jne .LBB28_2 2006; X86-BMI1BMI2-NEXT: # %bb.1: 2007; X86-BMI1BMI2-NEXT: movl $-1, %ecx 2008; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx 2009; X86-BMI1BMI2-NEXT: .LBB28_2: 2010; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax 2011; X86-BMI1BMI2-NEXT: retl 2012; 2013; X64-NOBMI-LABEL: bzhi64_32_b3: 2014; X64-NOBMI: # %bb.0: 2015; X64-NOBMI-NEXT: movl %esi, %ecx 2016; X64-NOBMI-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 2017; X64-NOBMI-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF 2018; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2019; X64-NOBMI-NEXT: shlq %cl, %rdx 2020; X64-NOBMI-NEXT: xorl %edx, %eax 2021; X64-NOBMI-NEXT: andl %edi, %eax 2022; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 2023; X64-NOBMI-NEXT: retq 2024; 2025; X64-BMI1NOTBM-LABEL: bzhi64_32_b3: 2026; X64-BMI1NOTBM: # %bb.0: 2027; X64-BMI1NOTBM-NEXT: shll $8, %esi 2028; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 2029; X64-BMI1NOTBM-NEXT: retq 2030; 2031; X64-BMI1BMI2-LABEL: bzhi64_32_b3: 2032; X64-BMI1BMI2: # %bb.0: 2033; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 2034; X64-BMI1BMI2-NEXT: retq 2035 %widenumlowbits = zext i8 %numlowbits to i64 2036 %notmask = shl nsw i64 4294967295, %widenumlowbits 2037 %mask = xor i64 %notmask, 4294967295 2038 %wideres = and i64 %val, %mask 2039 %res = trunc i64 %wideres to i32 2040 ret i32 %res 2041} 2042 2043; ---------------------------------------------------------------------------- ; 2044; Pattern c. 32-bit 2045; ---------------------------------------------------------------------------- ; 2046 2047declare void @use32(i32) 2048 2049define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { 2050; X86-NOBMI-LABEL: bzhi32_c0: 2051; X86-NOBMI: # %bb.0: 2052; X86-NOBMI-NEXT: pushl %esi 2053; X86-NOBMI-NEXT: subl $8, %esp 2054; X86-NOBMI-NEXT: xorl %ecx, %ecx 2055; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2056; X86-NOBMI-NEXT: movl $-1, %esi 2057; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2058; X86-NOBMI-NEXT: shrl %cl, %esi 2059; X86-NOBMI-NEXT: movl %esi, (%esp) 2060; X86-NOBMI-NEXT: calll use32 2061; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi 2062; X86-NOBMI-NEXT: movl %esi, %eax 2063; X86-NOBMI-NEXT: addl $8, %esp 2064; X86-NOBMI-NEXT: popl %esi 2065; X86-NOBMI-NEXT: retl 2066; 2067; X86-BMI1NOTBM-LABEL: bzhi32_c0: 2068; X86-BMI1NOTBM: # %bb.0: 2069; X86-BMI1NOTBM-NEXT: pushl %esi 2070; X86-BMI1NOTBM-NEXT: subl $8, %esp 2071; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx 2072; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2073; X86-BMI1NOTBM-NEXT: movl $-1, %esi 2074; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2075; X86-BMI1NOTBM-NEXT: shrl %cl, %esi 2076; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) 2077; X86-BMI1NOTBM-NEXT: calll use32 2078; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi 2079; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2080; X86-BMI1NOTBM-NEXT: addl $8, %esp 2081; X86-BMI1NOTBM-NEXT: popl %esi 2082; X86-BMI1NOTBM-NEXT: retl 2083; 2084; X86-BMI1BMI2-LABEL: bzhi32_c0: 2085; X86-BMI1BMI2: # %bb.0: 2086; X86-BMI1BMI2-NEXT: pushl %ebx 2087; X86-BMI1BMI2-NEXT: subl $8, %esp 2088; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 2089; X86-BMI1BMI2-NEXT: movl %ebx, %eax 2090; X86-BMI1BMI2-NEXT: negb %al 2091; X86-BMI1BMI2-NEXT: movl $-1, %ecx 2092; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax 2093; X86-BMI1BMI2-NEXT: movl %eax, (%esp) 2094; X86-BMI1BMI2-NEXT: calll use32 2095; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax 2096; X86-BMI1BMI2-NEXT: addl $8, %esp 2097; X86-BMI1BMI2-NEXT: popl %ebx 2098; X86-BMI1BMI2-NEXT: retl 2099; 2100; X64-NOBMI-LABEL: bzhi32_c0: 2101; X64-NOBMI: # %bb.0: 2102; X64-NOBMI-NEXT: pushq %rbp 2103; X64-NOBMI-NEXT: pushq %rbx 2104; X64-NOBMI-NEXT: pushq %rax 2105; X64-NOBMI-NEXT: movl %esi, %ecx 2106; X64-NOBMI-NEXT: movl %edi, %ebx 2107; X64-NOBMI-NEXT: negb %cl 2108; X64-NOBMI-NEXT: movl $-1, %ebp 2109; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2110; X64-NOBMI-NEXT: shrl %cl, %ebp 2111; X64-NOBMI-NEXT: movl %ebp, %edi 2112; X64-NOBMI-NEXT: callq use32 2113; X64-NOBMI-NEXT: andl %ebx, %ebp 2114; X64-NOBMI-NEXT: movl %ebp, %eax 2115; X64-NOBMI-NEXT: addq $8, %rsp 2116; X64-NOBMI-NEXT: popq %rbx 2117; X64-NOBMI-NEXT: popq %rbp 2118; X64-NOBMI-NEXT: retq 2119; 2120; X64-BMI1NOTBM-LABEL: bzhi32_c0: 2121; X64-BMI1NOTBM: # %bb.0: 2122; X64-BMI1NOTBM-NEXT: pushq %rbp 2123; X64-BMI1NOTBM-NEXT: pushq %rbx 2124; X64-BMI1NOTBM-NEXT: pushq %rax 2125; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 2126; X64-BMI1NOTBM-NEXT: movl %edi, %ebx 2127; X64-BMI1NOTBM-NEXT: negb %cl 2128; X64-BMI1NOTBM-NEXT: movl $-1, %ebp 2129; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2130; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp 2131; X64-BMI1NOTBM-NEXT: movl %ebp, %edi 2132; X64-BMI1NOTBM-NEXT: callq use32 2133; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp 2134; X64-BMI1NOTBM-NEXT: movl %ebp, %eax 2135; X64-BMI1NOTBM-NEXT: addq $8, %rsp 2136; X64-BMI1NOTBM-NEXT: popq %rbx 2137; X64-BMI1NOTBM-NEXT: popq %rbp 2138; X64-BMI1NOTBM-NEXT: retq 2139; 2140; X64-BMI1BMI2-LABEL: bzhi32_c0: 2141; X64-BMI1BMI2: # %bb.0: 2142; X64-BMI1BMI2-NEXT: pushq %rbp 2143; X64-BMI1BMI2-NEXT: pushq %rbx 2144; X64-BMI1BMI2-NEXT: pushq %rax 2145; X64-BMI1BMI2-NEXT: movl %esi, %ebx 2146; X64-BMI1BMI2-NEXT: movl %edi, %ebp 2147; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2148; X64-BMI1BMI2-NEXT: negb %al 2149; X64-BMI1BMI2-NEXT: movl $-1, %ecx 2150; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi 2151; X64-BMI1BMI2-NEXT: callq use32 2152; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax 2153; X64-BMI1BMI2-NEXT: addq $8, %rsp 2154; X64-BMI1BMI2-NEXT: popq %rbx 2155; X64-BMI1BMI2-NEXT: popq %rbp 2156; X64-BMI1BMI2-NEXT: retq 2157 %numhighbits = sub i32 32, %numlowbits 2158 %mask = lshr i32 -1, %numhighbits 2159 call void @use32(i32 %mask) 2160 %masked = and i32 %mask, %val 2161 ret i32 %masked 2162} 2163 2164define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { 2165; X86-NOBMI-LABEL: bzhi32_c1_indexzext: 2166; X86-NOBMI: # %bb.0: 2167; X86-NOBMI-NEXT: pushl %esi 2168; X86-NOBMI-NEXT: subl $8, %esp 2169; X86-NOBMI-NEXT: xorl %ecx, %ecx 2170; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2171; X86-NOBMI-NEXT: movl $-1, %esi 2172; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2173; X86-NOBMI-NEXT: shrl %cl, %esi 2174; X86-NOBMI-NEXT: movl %esi, (%esp) 2175; X86-NOBMI-NEXT: calll use32 2176; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi 2177; X86-NOBMI-NEXT: movl %esi, %eax 2178; X86-NOBMI-NEXT: addl $8, %esp 2179; X86-NOBMI-NEXT: popl %esi 2180; X86-NOBMI-NEXT: retl 2181; 2182; X86-BMI1NOTBM-LABEL: bzhi32_c1_indexzext: 2183; X86-BMI1NOTBM: # %bb.0: 2184; X86-BMI1NOTBM-NEXT: pushl %esi 2185; X86-BMI1NOTBM-NEXT: subl $8, %esp 2186; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx 2187; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2188; X86-BMI1NOTBM-NEXT: movl $-1, %esi 2189; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2190; X86-BMI1NOTBM-NEXT: shrl %cl, %esi 2191; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) 2192; X86-BMI1NOTBM-NEXT: calll use32 2193; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi 2194; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2195; X86-BMI1NOTBM-NEXT: addl $8, %esp 2196; X86-BMI1NOTBM-NEXT: popl %esi 2197; X86-BMI1NOTBM-NEXT: retl 2198; 2199; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext: 2200; X86-BMI1BMI2: # %bb.0: 2201; X86-BMI1BMI2-NEXT: pushl %ebx 2202; X86-BMI1BMI2-NEXT: subl $8, %esp 2203; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 2204; X86-BMI1BMI2-NEXT: movl %ebx, %eax 2205; X86-BMI1BMI2-NEXT: negb %al 2206; X86-BMI1BMI2-NEXT: movl $-1, %ecx 2207; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax 2208; X86-BMI1BMI2-NEXT: movl %eax, (%esp) 2209; X86-BMI1BMI2-NEXT: calll use32 2210; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax 2211; X86-BMI1BMI2-NEXT: addl $8, %esp 2212; X86-BMI1BMI2-NEXT: popl %ebx 2213; X86-BMI1BMI2-NEXT: retl 2214; 2215; X64-NOBMI-LABEL: bzhi32_c1_indexzext: 2216; X64-NOBMI: # %bb.0: 2217; X64-NOBMI-NEXT: pushq %rbp 2218; X64-NOBMI-NEXT: pushq %rbx 2219; X64-NOBMI-NEXT: pushq %rax 2220; X64-NOBMI-NEXT: movl %esi, %ecx 2221; X64-NOBMI-NEXT: movl %edi, %ebx 2222; X64-NOBMI-NEXT: negb %cl 2223; X64-NOBMI-NEXT: movl $-1, %ebp 2224; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2225; X64-NOBMI-NEXT: shrl %cl, %ebp 2226; X64-NOBMI-NEXT: movl %ebp, %edi 2227; X64-NOBMI-NEXT: callq use32 2228; X64-NOBMI-NEXT: andl %ebx, %ebp 2229; X64-NOBMI-NEXT: movl %ebp, %eax 2230; X64-NOBMI-NEXT: addq $8, %rsp 2231; X64-NOBMI-NEXT: popq %rbx 2232; X64-NOBMI-NEXT: popq %rbp 2233; X64-NOBMI-NEXT: retq 2234; 2235; X64-BMI1NOTBM-LABEL: bzhi32_c1_indexzext: 2236; X64-BMI1NOTBM: # %bb.0: 2237; X64-BMI1NOTBM-NEXT: pushq %rbp 2238; X64-BMI1NOTBM-NEXT: pushq %rbx 2239; X64-BMI1NOTBM-NEXT: pushq %rax 2240; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 2241; X64-BMI1NOTBM-NEXT: movl %edi, %ebx 2242; X64-BMI1NOTBM-NEXT: negb %cl 2243; X64-BMI1NOTBM-NEXT: movl $-1, %ebp 2244; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2245; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp 2246; X64-BMI1NOTBM-NEXT: movl %ebp, %edi 2247; X64-BMI1NOTBM-NEXT: callq use32 2248; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp 2249; X64-BMI1NOTBM-NEXT: movl %ebp, %eax 2250; X64-BMI1NOTBM-NEXT: addq $8, %rsp 2251; X64-BMI1NOTBM-NEXT: popq %rbx 2252; X64-BMI1NOTBM-NEXT: popq %rbp 2253; X64-BMI1NOTBM-NEXT: retq 2254; 2255; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext: 2256; X64-BMI1BMI2: # %bb.0: 2257; X64-BMI1BMI2-NEXT: pushq %rbp 2258; X64-BMI1BMI2-NEXT: pushq %rbx 2259; X64-BMI1BMI2-NEXT: pushq %rax 2260; X64-BMI1BMI2-NEXT: movl %esi, %ebx 2261; X64-BMI1BMI2-NEXT: movl %edi, %ebp 2262; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2263; X64-BMI1BMI2-NEXT: negb %al 2264; X64-BMI1BMI2-NEXT: movl $-1, %ecx 2265; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi 2266; X64-BMI1BMI2-NEXT: callq use32 2267; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax 2268; X64-BMI1BMI2-NEXT: addq $8, %rsp 2269; X64-BMI1BMI2-NEXT: popq %rbx 2270; X64-BMI1BMI2-NEXT: popq %rbp 2271; X64-BMI1BMI2-NEXT: retq 2272 %numhighbits = sub i8 32, %numlowbits 2273 %sh_prom = zext i8 %numhighbits to i32 2274 %mask = lshr i32 -1, %sh_prom 2275 call void @use32(i32 %mask) 2276 %masked = and i32 %mask, %val 2277 ret i32 %masked 2278} 2279 2280define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { 2281; X86-NOBMI-LABEL: bzhi32_c2_load: 2282; X86-NOBMI: # %bb.0: 2283; X86-NOBMI-NEXT: pushl %esi 2284; X86-NOBMI-NEXT: subl $8, %esp 2285; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2286; X86-NOBMI-NEXT: xorl %ecx, %ecx 2287; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2288; X86-NOBMI-NEXT: movl $-1, %edx 2289; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2290; X86-NOBMI-NEXT: shrl %cl, %edx 2291; X86-NOBMI-NEXT: movl (%eax), %esi 2292; X86-NOBMI-NEXT: andl %edx, %esi 2293; X86-NOBMI-NEXT: movl %edx, (%esp) 2294; X86-NOBMI-NEXT: calll use32 2295; X86-NOBMI-NEXT: movl %esi, %eax 2296; X86-NOBMI-NEXT: addl $8, %esp 2297; X86-NOBMI-NEXT: popl %esi 2298; X86-NOBMI-NEXT: retl 2299; 2300; X86-BMI1NOTBM-LABEL: bzhi32_c2_load: 2301; X86-BMI1NOTBM: # %bb.0: 2302; X86-BMI1NOTBM-NEXT: pushl %esi 2303; X86-BMI1NOTBM-NEXT: subl $8, %esp 2304; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 2305; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx 2306; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2307; X86-BMI1NOTBM-NEXT: movl $-1, %edx 2308; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2309; X86-BMI1NOTBM-NEXT: shrl %cl, %edx 2310; X86-BMI1NOTBM-NEXT: movl (%eax), %esi 2311; X86-BMI1NOTBM-NEXT: andl %edx, %esi 2312; X86-BMI1NOTBM-NEXT: movl %edx, (%esp) 2313; X86-BMI1NOTBM-NEXT: calll use32 2314; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2315; X86-BMI1NOTBM-NEXT: addl $8, %esp 2316; X86-BMI1NOTBM-NEXT: popl %esi 2317; X86-BMI1NOTBM-NEXT: retl 2318; 2319; X86-BMI1BMI2-LABEL: bzhi32_c2_load: 2320; X86-BMI1BMI2: # %bb.0: 2321; X86-BMI1BMI2-NEXT: pushl %esi 2322; X86-BMI1BMI2-NEXT: subl $8, %esp 2323; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2324; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 2325; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi 2326; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx 2327; X86-BMI1BMI2-NEXT: negb %cl 2328; X86-BMI1BMI2-NEXT: movl $-1, %eax 2329; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax 2330; X86-BMI1BMI2-NEXT: movl %eax, (%esp) 2331; X86-BMI1BMI2-NEXT: calll use32 2332; X86-BMI1BMI2-NEXT: movl %esi, %eax 2333; X86-BMI1BMI2-NEXT: addl $8, %esp 2334; X86-BMI1BMI2-NEXT: popl %esi 2335; X86-BMI1BMI2-NEXT: retl 2336; 2337; X64-NOBMI-LABEL: bzhi32_c2_load: 2338; X64-NOBMI: # %bb.0: 2339; X64-NOBMI-NEXT: pushq %rbx 2340; X64-NOBMI-NEXT: movl %esi, %ecx 2341; X64-NOBMI-NEXT: negb %cl 2342; X64-NOBMI-NEXT: movl $-1, %eax 2343; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2344; X64-NOBMI-NEXT: shrl %cl, %eax 2345; X64-NOBMI-NEXT: movl (%rdi), %ebx 2346; X64-NOBMI-NEXT: andl %eax, %ebx 2347; X64-NOBMI-NEXT: movl %eax, %edi 2348; X64-NOBMI-NEXT: callq use32 2349; X64-NOBMI-NEXT: movl %ebx, %eax 2350; X64-NOBMI-NEXT: popq %rbx 2351; X64-NOBMI-NEXT: retq 2352; 2353; X64-BMI1NOTBM-LABEL: bzhi32_c2_load: 2354; X64-BMI1NOTBM: # %bb.0: 2355; X64-BMI1NOTBM-NEXT: pushq %rbx 2356; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 2357; X64-BMI1NOTBM-NEXT: negb %cl 2358; X64-BMI1NOTBM-NEXT: movl $-1, %eax 2359; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2360; X64-BMI1NOTBM-NEXT: shrl %cl, %eax 2361; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebx 2362; X64-BMI1NOTBM-NEXT: andl %eax, %ebx 2363; X64-BMI1NOTBM-NEXT: movl %eax, %edi 2364; X64-BMI1NOTBM-NEXT: callq use32 2365; X64-BMI1NOTBM-NEXT: movl %ebx, %eax 2366; X64-BMI1NOTBM-NEXT: popq %rbx 2367; X64-BMI1NOTBM-NEXT: retq 2368; 2369; X64-BMI1BMI2-LABEL: bzhi32_c2_load: 2370; X64-BMI1BMI2: # %bb.0: 2371; X64-BMI1BMI2-NEXT: pushq %rbx 2372; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx 2373; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2374; X64-BMI1BMI2-NEXT: negb %sil 2375; X64-BMI1BMI2-NEXT: movl $-1, %eax 2376; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi 2377; X64-BMI1BMI2-NEXT: callq use32 2378; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2379; X64-BMI1BMI2-NEXT: popq %rbx 2380; X64-BMI1BMI2-NEXT: retq 2381 %val = load i32, i32* %w 2382 %numhighbits = sub i32 32, %numlowbits 2383 %mask = lshr i32 -1, %numhighbits 2384 call void @use32(i32 %mask) 2385 %masked = and i32 %mask, %val 2386 ret i32 %masked 2387} 2388 2389define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 2390; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext: 2391; X86-NOBMI: # %bb.0: 2392; X86-NOBMI-NEXT: pushl %esi 2393; X86-NOBMI-NEXT: subl $8, %esp 2394; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2395; X86-NOBMI-NEXT: xorl %ecx, %ecx 2396; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2397; X86-NOBMI-NEXT: movl $-1, %edx 2398; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2399; X86-NOBMI-NEXT: shrl %cl, %edx 2400; X86-NOBMI-NEXT: movl (%eax), %esi 2401; X86-NOBMI-NEXT: andl %edx, %esi 2402; X86-NOBMI-NEXT: movl %edx, (%esp) 2403; X86-NOBMI-NEXT: calll use32 2404; X86-NOBMI-NEXT: movl %esi, %eax 2405; X86-NOBMI-NEXT: addl $8, %esp 2406; X86-NOBMI-NEXT: popl %esi 2407; X86-NOBMI-NEXT: retl 2408; 2409; X86-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext: 2410; X86-BMI1NOTBM: # %bb.0: 2411; X86-BMI1NOTBM-NEXT: pushl %esi 2412; X86-BMI1NOTBM-NEXT: subl $8, %esp 2413; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 2414; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx 2415; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2416; X86-BMI1NOTBM-NEXT: movl $-1, %edx 2417; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2418; X86-BMI1NOTBM-NEXT: shrl %cl, %edx 2419; X86-BMI1NOTBM-NEXT: movl (%eax), %esi 2420; X86-BMI1NOTBM-NEXT: andl %edx, %esi 2421; X86-BMI1NOTBM-NEXT: movl %edx, (%esp) 2422; X86-BMI1NOTBM-NEXT: calll use32 2423; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2424; X86-BMI1NOTBM-NEXT: addl $8, %esp 2425; X86-BMI1NOTBM-NEXT: popl %esi 2426; X86-BMI1NOTBM-NEXT: retl 2427; 2428; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: 2429; X86-BMI1BMI2: # %bb.0: 2430; X86-BMI1BMI2-NEXT: pushl %esi 2431; X86-BMI1BMI2-NEXT: subl $8, %esp 2432; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2433; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 2434; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi 2435; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx 2436; X86-BMI1BMI2-NEXT: negb %cl 2437; X86-BMI1BMI2-NEXT: movl $-1, %eax 2438; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax 2439; X86-BMI1BMI2-NEXT: movl %eax, (%esp) 2440; X86-BMI1BMI2-NEXT: calll use32 2441; X86-BMI1BMI2-NEXT: movl %esi, %eax 2442; X86-BMI1BMI2-NEXT: addl $8, %esp 2443; X86-BMI1BMI2-NEXT: popl %esi 2444; X86-BMI1BMI2-NEXT: retl 2445; 2446; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: 2447; X64-NOBMI: # %bb.0: 2448; X64-NOBMI-NEXT: pushq %rbx 2449; X64-NOBMI-NEXT: movl %esi, %ecx 2450; X64-NOBMI-NEXT: negb %cl 2451; X64-NOBMI-NEXT: movl $-1, %eax 2452; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2453; X64-NOBMI-NEXT: shrl %cl, %eax 2454; X64-NOBMI-NEXT: movl (%rdi), %ebx 2455; X64-NOBMI-NEXT: andl %eax, %ebx 2456; X64-NOBMI-NEXT: movl %eax, %edi 2457; X64-NOBMI-NEXT: callq use32 2458; X64-NOBMI-NEXT: movl %ebx, %eax 2459; X64-NOBMI-NEXT: popq %rbx 2460; X64-NOBMI-NEXT: retq 2461; 2462; X64-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext: 2463; X64-BMI1NOTBM: # %bb.0: 2464; X64-BMI1NOTBM-NEXT: pushq %rbx 2465; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 2466; X64-BMI1NOTBM-NEXT: negb %cl 2467; X64-BMI1NOTBM-NEXT: movl $-1, %eax 2468; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2469; X64-BMI1NOTBM-NEXT: shrl %cl, %eax 2470; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebx 2471; X64-BMI1NOTBM-NEXT: andl %eax, %ebx 2472; X64-BMI1NOTBM-NEXT: movl %eax, %edi 2473; X64-BMI1NOTBM-NEXT: callq use32 2474; X64-BMI1NOTBM-NEXT: movl %ebx, %eax 2475; X64-BMI1NOTBM-NEXT: popq %rbx 2476; X64-BMI1NOTBM-NEXT: retq 2477; 2478; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: 2479; X64-BMI1BMI2: # %bb.0: 2480; X64-BMI1BMI2-NEXT: pushq %rbx 2481; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx 2482; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2483; X64-BMI1BMI2-NEXT: negb %sil 2484; X64-BMI1BMI2-NEXT: movl $-1, %eax 2485; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi 2486; X64-BMI1BMI2-NEXT: callq use32 2487; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2488; X64-BMI1BMI2-NEXT: popq %rbx 2489; X64-BMI1BMI2-NEXT: retq 2490 %val = load i32, i32* %w 2491 %numhighbits = sub i8 32, %numlowbits 2492 %sh_prom = zext i8 %numhighbits to i32 2493 %mask = lshr i32 -1, %sh_prom 2494 call void @use32(i32 %mask) 2495 %masked = and i32 %mask, %val 2496 ret i32 %masked 2497} 2498 2499define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { 2500; X86-NOBMI-LABEL: bzhi32_c4_commutative: 2501; X86-NOBMI: # %bb.0: 2502; X86-NOBMI-NEXT: pushl %esi 2503; X86-NOBMI-NEXT: subl $8, %esp 2504; X86-NOBMI-NEXT: xorl %ecx, %ecx 2505; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2506; X86-NOBMI-NEXT: movl $-1, %esi 2507; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2508; X86-NOBMI-NEXT: shrl %cl, %esi 2509; X86-NOBMI-NEXT: movl %esi, (%esp) 2510; X86-NOBMI-NEXT: calll use32 2511; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi 2512; X86-NOBMI-NEXT: movl %esi, %eax 2513; X86-NOBMI-NEXT: addl $8, %esp 2514; X86-NOBMI-NEXT: popl %esi 2515; X86-NOBMI-NEXT: retl 2516; 2517; X86-BMI1NOTBM-LABEL: bzhi32_c4_commutative: 2518; X86-BMI1NOTBM: # %bb.0: 2519; X86-BMI1NOTBM-NEXT: pushl %esi 2520; X86-BMI1NOTBM-NEXT: subl $8, %esp 2521; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx 2522; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2523; X86-BMI1NOTBM-NEXT: movl $-1, %esi 2524; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2525; X86-BMI1NOTBM-NEXT: shrl %cl, %esi 2526; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) 2527; X86-BMI1NOTBM-NEXT: calll use32 2528; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi 2529; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2530; X86-BMI1NOTBM-NEXT: addl $8, %esp 2531; X86-BMI1NOTBM-NEXT: popl %esi 2532; X86-BMI1NOTBM-NEXT: retl 2533; 2534; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative: 2535; X86-BMI1BMI2: # %bb.0: 2536; X86-BMI1BMI2-NEXT: pushl %ebx 2537; X86-BMI1BMI2-NEXT: subl $8, %esp 2538; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 2539; X86-BMI1BMI2-NEXT: movl %ebx, %eax 2540; X86-BMI1BMI2-NEXT: negb %al 2541; X86-BMI1BMI2-NEXT: movl $-1, %ecx 2542; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax 2543; X86-BMI1BMI2-NEXT: movl %eax, (%esp) 2544; X86-BMI1BMI2-NEXT: calll use32 2545; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax 2546; X86-BMI1BMI2-NEXT: addl $8, %esp 2547; X86-BMI1BMI2-NEXT: popl %ebx 2548; X86-BMI1BMI2-NEXT: retl 2549; 2550; X64-NOBMI-LABEL: bzhi32_c4_commutative: 2551; X64-NOBMI: # %bb.0: 2552; X64-NOBMI-NEXT: pushq %rbp 2553; X64-NOBMI-NEXT: pushq %rbx 2554; X64-NOBMI-NEXT: pushq %rax 2555; X64-NOBMI-NEXT: movl %esi, %ecx 2556; X64-NOBMI-NEXT: movl %edi, %ebx 2557; X64-NOBMI-NEXT: negb %cl 2558; X64-NOBMI-NEXT: movl $-1, %ebp 2559; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2560; X64-NOBMI-NEXT: shrl %cl, %ebp 2561; X64-NOBMI-NEXT: movl %ebp, %edi 2562; X64-NOBMI-NEXT: callq use32 2563; X64-NOBMI-NEXT: andl %ebx, %ebp 2564; X64-NOBMI-NEXT: movl %ebp, %eax 2565; X64-NOBMI-NEXT: addq $8, %rsp 2566; X64-NOBMI-NEXT: popq %rbx 2567; X64-NOBMI-NEXT: popq %rbp 2568; X64-NOBMI-NEXT: retq 2569; 2570; X64-BMI1NOTBM-LABEL: bzhi32_c4_commutative: 2571; X64-BMI1NOTBM: # %bb.0: 2572; X64-BMI1NOTBM-NEXT: pushq %rbp 2573; X64-BMI1NOTBM-NEXT: pushq %rbx 2574; X64-BMI1NOTBM-NEXT: pushq %rax 2575; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 2576; X64-BMI1NOTBM-NEXT: movl %edi, %ebx 2577; X64-BMI1NOTBM-NEXT: negb %cl 2578; X64-BMI1NOTBM-NEXT: movl $-1, %ebp 2579; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2580; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp 2581; X64-BMI1NOTBM-NEXT: movl %ebp, %edi 2582; X64-BMI1NOTBM-NEXT: callq use32 2583; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp 2584; X64-BMI1NOTBM-NEXT: movl %ebp, %eax 2585; X64-BMI1NOTBM-NEXT: addq $8, %rsp 2586; X64-BMI1NOTBM-NEXT: popq %rbx 2587; X64-BMI1NOTBM-NEXT: popq %rbp 2588; X64-BMI1NOTBM-NEXT: retq 2589; 2590; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative: 2591; X64-BMI1BMI2: # %bb.0: 2592; X64-BMI1BMI2-NEXT: pushq %rbp 2593; X64-BMI1BMI2-NEXT: pushq %rbx 2594; X64-BMI1BMI2-NEXT: pushq %rax 2595; X64-BMI1BMI2-NEXT: movl %esi, %ebx 2596; X64-BMI1BMI2-NEXT: movl %edi, %ebp 2597; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2598; X64-BMI1BMI2-NEXT: negb %al 2599; X64-BMI1BMI2-NEXT: movl $-1, %ecx 2600; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi 2601; X64-BMI1BMI2-NEXT: callq use32 2602; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax 2603; X64-BMI1BMI2-NEXT: addq $8, %rsp 2604; X64-BMI1BMI2-NEXT: popq %rbx 2605; X64-BMI1BMI2-NEXT: popq %rbp 2606; X64-BMI1BMI2-NEXT: retq 2607 %numhighbits = sub i32 32, %numlowbits 2608 %mask = lshr i32 -1, %numhighbits 2609 call void @use32(i32 %mask) 2610 %masked = and i32 %val, %mask ; swapped order 2611 ret i32 %masked 2612} 2613 2614; 64-bit 2615 2616declare void @use64(i64) 2617 2618define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { 2619; X86-NOBMI-LABEL: bzhi64_c0: 2620; X86-NOBMI: # %bb.0: 2621; X86-NOBMI-NEXT: pushl %edi 2622; X86-NOBMI-NEXT: pushl %esi 2623; X86-NOBMI-NEXT: pushl %eax 2624; X86-NOBMI-NEXT: movb $64, %cl 2625; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2626; X86-NOBMI-NEXT: movl $-1, %esi 2627; X86-NOBMI-NEXT: movl $-1, %edi 2628; X86-NOBMI-NEXT: shrl %cl, %edi 2629; X86-NOBMI-NEXT: testb $32, %cl 2630; X86-NOBMI-NEXT: je .LBB34_2 2631; X86-NOBMI-NEXT: # %bb.1: 2632; X86-NOBMI-NEXT: movl %edi, %esi 2633; X86-NOBMI-NEXT: xorl %edi, %edi 2634; X86-NOBMI-NEXT: .LBB34_2: 2635; X86-NOBMI-NEXT: subl $8, %esp 2636; X86-NOBMI-NEXT: pushl %edi 2637; X86-NOBMI-NEXT: pushl %esi 2638; X86-NOBMI-NEXT: calll use64 2639; X86-NOBMI-NEXT: addl $16, %esp 2640; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi 2641; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi 2642; X86-NOBMI-NEXT: movl %esi, %eax 2643; X86-NOBMI-NEXT: movl %edi, %edx 2644; X86-NOBMI-NEXT: addl $4, %esp 2645; X86-NOBMI-NEXT: popl %esi 2646; X86-NOBMI-NEXT: popl %edi 2647; X86-NOBMI-NEXT: retl 2648; 2649; X86-BMI1NOTBM-LABEL: bzhi64_c0: 2650; X86-BMI1NOTBM: # %bb.0: 2651; X86-BMI1NOTBM-NEXT: pushl %edi 2652; X86-BMI1NOTBM-NEXT: pushl %esi 2653; X86-BMI1NOTBM-NEXT: pushl %eax 2654; X86-BMI1NOTBM-NEXT: movb $64, %cl 2655; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2656; X86-BMI1NOTBM-NEXT: movl $-1, %esi 2657; X86-BMI1NOTBM-NEXT: movl $-1, %edi 2658; X86-BMI1NOTBM-NEXT: shrl %cl, %edi 2659; X86-BMI1NOTBM-NEXT: testb $32, %cl 2660; X86-BMI1NOTBM-NEXT: je .LBB34_2 2661; X86-BMI1NOTBM-NEXT: # %bb.1: 2662; X86-BMI1NOTBM-NEXT: movl %edi, %esi 2663; X86-BMI1NOTBM-NEXT: xorl %edi, %edi 2664; X86-BMI1NOTBM-NEXT: .LBB34_2: 2665; X86-BMI1NOTBM-NEXT: subl $8, %esp 2666; X86-BMI1NOTBM-NEXT: pushl %edi 2667; X86-BMI1NOTBM-NEXT: pushl %esi 2668; X86-BMI1NOTBM-NEXT: calll use64 2669; X86-BMI1NOTBM-NEXT: addl $16, %esp 2670; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi 2671; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi 2672; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2673; X86-BMI1NOTBM-NEXT: movl %edi, %edx 2674; X86-BMI1NOTBM-NEXT: addl $4, %esp 2675; X86-BMI1NOTBM-NEXT: popl %esi 2676; X86-BMI1NOTBM-NEXT: popl %edi 2677; X86-BMI1NOTBM-NEXT: retl 2678; 2679; X86-BMI1BMI2-LABEL: bzhi64_c0: 2680; X86-BMI1BMI2: # %bb.0: 2681; X86-BMI1BMI2-NEXT: pushl %edi 2682; X86-BMI1BMI2-NEXT: pushl %esi 2683; X86-BMI1BMI2-NEXT: pushl %eax 2684; X86-BMI1BMI2-NEXT: movb $64, %al 2685; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 2686; X86-BMI1BMI2-NEXT: movl $-1, %edi 2687; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi 2688; X86-BMI1BMI2-NEXT: testb $32, %al 2689; X86-BMI1BMI2-NEXT: je .LBB34_2 2690; X86-BMI1BMI2-NEXT: # %bb.1: 2691; X86-BMI1BMI2-NEXT: movl %esi, %edi 2692; X86-BMI1BMI2-NEXT: xorl %esi, %esi 2693; X86-BMI1BMI2-NEXT: .LBB34_2: 2694; X86-BMI1BMI2-NEXT: subl $8, %esp 2695; X86-BMI1BMI2-NEXT: pushl %esi 2696; X86-BMI1BMI2-NEXT: pushl %edi 2697; X86-BMI1BMI2-NEXT: calll use64 2698; X86-BMI1BMI2-NEXT: addl $16, %esp 2699; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 2700; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 2701; X86-BMI1BMI2-NEXT: movl %edi, %eax 2702; X86-BMI1BMI2-NEXT: movl %esi, %edx 2703; X86-BMI1BMI2-NEXT: addl $4, %esp 2704; X86-BMI1BMI2-NEXT: popl %esi 2705; X86-BMI1BMI2-NEXT: popl %edi 2706; X86-BMI1BMI2-NEXT: retl 2707; 2708; X64-NOBMI-LABEL: bzhi64_c0: 2709; X64-NOBMI: # %bb.0: 2710; X64-NOBMI-NEXT: pushq %r14 2711; X64-NOBMI-NEXT: pushq %rbx 2712; X64-NOBMI-NEXT: pushq %rax 2713; X64-NOBMI-NEXT: movq %rsi, %rcx 2714; X64-NOBMI-NEXT: movq %rdi, %r14 2715; X64-NOBMI-NEXT: negb %cl 2716; X64-NOBMI-NEXT: movq $-1, %rbx 2717; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 2718; X64-NOBMI-NEXT: shrq %cl, %rbx 2719; X64-NOBMI-NEXT: movq %rbx, %rdi 2720; X64-NOBMI-NEXT: callq use64 2721; X64-NOBMI-NEXT: andq %r14, %rbx 2722; X64-NOBMI-NEXT: movq %rbx, %rax 2723; X64-NOBMI-NEXT: addq $8, %rsp 2724; X64-NOBMI-NEXT: popq %rbx 2725; X64-NOBMI-NEXT: popq %r14 2726; X64-NOBMI-NEXT: retq 2727; 2728; X64-BMI1NOTBM-LABEL: bzhi64_c0: 2729; X64-BMI1NOTBM: # %bb.0: 2730; X64-BMI1NOTBM-NEXT: pushq %r14 2731; X64-BMI1NOTBM-NEXT: pushq %rbx 2732; X64-BMI1NOTBM-NEXT: pushq %rax 2733; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx 2734; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 2735; X64-BMI1NOTBM-NEXT: negb %cl 2736; X64-BMI1NOTBM-NEXT: movq $-1, %rbx 2737; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx 2738; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx 2739; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi 2740; X64-BMI1NOTBM-NEXT: callq use64 2741; X64-BMI1NOTBM-NEXT: andq %r14, %rbx 2742; X64-BMI1NOTBM-NEXT: movq %rbx, %rax 2743; X64-BMI1NOTBM-NEXT: addq $8, %rsp 2744; X64-BMI1NOTBM-NEXT: popq %rbx 2745; X64-BMI1NOTBM-NEXT: popq %r14 2746; X64-BMI1NOTBM-NEXT: retq 2747; 2748; X64-BMI1BMI2-LABEL: bzhi64_c0: 2749; X64-BMI1BMI2: # %bb.0: 2750; X64-BMI1BMI2-NEXT: pushq %r14 2751; X64-BMI1BMI2-NEXT: pushq %rbx 2752; X64-BMI1BMI2-NEXT: pushq %rax 2753; X64-BMI1BMI2-NEXT: movq %rsi, %rbx 2754; X64-BMI1BMI2-NEXT: movq %rdi, %r14 2755; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2756; X64-BMI1BMI2-NEXT: negb %al 2757; X64-BMI1BMI2-NEXT: movq $-1, %rcx 2758; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi 2759; X64-BMI1BMI2-NEXT: callq use64 2760; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax 2761; X64-BMI1BMI2-NEXT: addq $8, %rsp 2762; X64-BMI1BMI2-NEXT: popq %rbx 2763; X64-BMI1BMI2-NEXT: popq %r14 2764; X64-BMI1BMI2-NEXT: retq 2765 %numhighbits = sub i64 64, %numlowbits 2766 %mask = lshr i64 -1, %numhighbits 2767 call void @use64(i64 %mask) 2768 %masked = and i64 %mask, %val 2769 ret i64 %masked 2770} 2771 2772define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { 2773; X86-NOBMI-LABEL: bzhi64_c1_indexzext: 2774; X86-NOBMI: # %bb.0: 2775; X86-NOBMI-NEXT: pushl %edi 2776; X86-NOBMI-NEXT: pushl %esi 2777; X86-NOBMI-NEXT: pushl %eax 2778; X86-NOBMI-NEXT: movb $64, %cl 2779; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2780; X86-NOBMI-NEXT: movl $-1, %esi 2781; X86-NOBMI-NEXT: movl $-1, %edi 2782; X86-NOBMI-NEXT: shrl %cl, %edi 2783; X86-NOBMI-NEXT: testb $32, %cl 2784; X86-NOBMI-NEXT: je .LBB35_2 2785; X86-NOBMI-NEXT: # %bb.1: 2786; X86-NOBMI-NEXT: movl %edi, %esi 2787; X86-NOBMI-NEXT: xorl %edi, %edi 2788; X86-NOBMI-NEXT: .LBB35_2: 2789; X86-NOBMI-NEXT: subl $8, %esp 2790; X86-NOBMI-NEXT: pushl %edi 2791; X86-NOBMI-NEXT: pushl %esi 2792; X86-NOBMI-NEXT: calll use64 2793; X86-NOBMI-NEXT: addl $16, %esp 2794; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi 2795; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi 2796; X86-NOBMI-NEXT: movl %esi, %eax 2797; X86-NOBMI-NEXT: movl %edi, %edx 2798; X86-NOBMI-NEXT: addl $4, %esp 2799; X86-NOBMI-NEXT: popl %esi 2800; X86-NOBMI-NEXT: popl %edi 2801; X86-NOBMI-NEXT: retl 2802; 2803; X86-BMI1NOTBM-LABEL: bzhi64_c1_indexzext: 2804; X86-BMI1NOTBM: # %bb.0: 2805; X86-BMI1NOTBM-NEXT: pushl %edi 2806; X86-BMI1NOTBM-NEXT: pushl %esi 2807; X86-BMI1NOTBM-NEXT: pushl %eax 2808; X86-BMI1NOTBM-NEXT: movb $64, %cl 2809; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2810; X86-BMI1NOTBM-NEXT: movl $-1, %esi 2811; X86-BMI1NOTBM-NEXT: movl $-1, %edi 2812; X86-BMI1NOTBM-NEXT: shrl %cl, %edi 2813; X86-BMI1NOTBM-NEXT: testb $32, %cl 2814; X86-BMI1NOTBM-NEXT: je .LBB35_2 2815; X86-BMI1NOTBM-NEXT: # %bb.1: 2816; X86-BMI1NOTBM-NEXT: movl %edi, %esi 2817; X86-BMI1NOTBM-NEXT: xorl %edi, %edi 2818; X86-BMI1NOTBM-NEXT: .LBB35_2: 2819; X86-BMI1NOTBM-NEXT: subl $8, %esp 2820; X86-BMI1NOTBM-NEXT: pushl %edi 2821; X86-BMI1NOTBM-NEXT: pushl %esi 2822; X86-BMI1NOTBM-NEXT: calll use64 2823; X86-BMI1NOTBM-NEXT: addl $16, %esp 2824; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi 2825; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi 2826; X86-BMI1NOTBM-NEXT: movl %esi, %eax 2827; X86-BMI1NOTBM-NEXT: movl %edi, %edx 2828; X86-BMI1NOTBM-NEXT: addl $4, %esp 2829; X86-BMI1NOTBM-NEXT: popl %esi 2830; X86-BMI1NOTBM-NEXT: popl %edi 2831; X86-BMI1NOTBM-NEXT: retl 2832; 2833; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext: 2834; X86-BMI1BMI2: # %bb.0: 2835; X86-BMI1BMI2-NEXT: pushl %edi 2836; X86-BMI1BMI2-NEXT: pushl %esi 2837; X86-BMI1BMI2-NEXT: pushl %eax 2838; X86-BMI1BMI2-NEXT: movb $64, %al 2839; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 2840; X86-BMI1BMI2-NEXT: movl $-1, %edi 2841; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi 2842; X86-BMI1BMI2-NEXT: testb $32, %al 2843; X86-BMI1BMI2-NEXT: je .LBB35_2 2844; X86-BMI1BMI2-NEXT: # %bb.1: 2845; X86-BMI1BMI2-NEXT: movl %esi, %edi 2846; X86-BMI1BMI2-NEXT: xorl %esi, %esi 2847; X86-BMI1BMI2-NEXT: .LBB35_2: 2848; X86-BMI1BMI2-NEXT: subl $8, %esp 2849; X86-BMI1BMI2-NEXT: pushl %esi 2850; X86-BMI1BMI2-NEXT: pushl %edi 2851; X86-BMI1BMI2-NEXT: calll use64 2852; X86-BMI1BMI2-NEXT: addl $16, %esp 2853; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 2854; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 2855; X86-BMI1BMI2-NEXT: movl %edi, %eax 2856; X86-BMI1BMI2-NEXT: movl %esi, %edx 2857; X86-BMI1BMI2-NEXT: addl $4, %esp 2858; X86-BMI1BMI2-NEXT: popl %esi 2859; X86-BMI1BMI2-NEXT: popl %edi 2860; X86-BMI1BMI2-NEXT: retl 2861; 2862; X64-NOBMI-LABEL: bzhi64_c1_indexzext: 2863; X64-NOBMI: # %bb.0: 2864; X64-NOBMI-NEXT: pushq %r14 2865; X64-NOBMI-NEXT: pushq %rbx 2866; X64-NOBMI-NEXT: pushq %rax 2867; X64-NOBMI-NEXT: movl %esi, %ecx 2868; X64-NOBMI-NEXT: movq %rdi, %r14 2869; X64-NOBMI-NEXT: negb %cl 2870; X64-NOBMI-NEXT: movq $-1, %rbx 2871; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2872; X64-NOBMI-NEXT: shrq %cl, %rbx 2873; X64-NOBMI-NEXT: movq %rbx, %rdi 2874; X64-NOBMI-NEXT: callq use64 2875; X64-NOBMI-NEXT: andq %r14, %rbx 2876; X64-NOBMI-NEXT: movq %rbx, %rax 2877; X64-NOBMI-NEXT: addq $8, %rsp 2878; X64-NOBMI-NEXT: popq %rbx 2879; X64-NOBMI-NEXT: popq %r14 2880; X64-NOBMI-NEXT: retq 2881; 2882; X64-BMI1NOTBM-LABEL: bzhi64_c1_indexzext: 2883; X64-BMI1NOTBM: # %bb.0: 2884; X64-BMI1NOTBM-NEXT: pushq %r14 2885; X64-BMI1NOTBM-NEXT: pushq %rbx 2886; X64-BMI1NOTBM-NEXT: pushq %rax 2887; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 2888; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 2889; X64-BMI1NOTBM-NEXT: negb %cl 2890; X64-BMI1NOTBM-NEXT: movq $-1, %rbx 2891; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 2892; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx 2893; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi 2894; X64-BMI1NOTBM-NEXT: callq use64 2895; X64-BMI1NOTBM-NEXT: andq %r14, %rbx 2896; X64-BMI1NOTBM-NEXT: movq %rbx, %rax 2897; X64-BMI1NOTBM-NEXT: addq $8, %rsp 2898; X64-BMI1NOTBM-NEXT: popq %rbx 2899; X64-BMI1NOTBM-NEXT: popq %r14 2900; X64-BMI1NOTBM-NEXT: retq 2901; 2902; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext: 2903; X64-BMI1BMI2: # %bb.0: 2904; X64-BMI1BMI2-NEXT: pushq %r14 2905; X64-BMI1BMI2-NEXT: pushq %rbx 2906; X64-BMI1BMI2-NEXT: pushq %rax 2907; X64-BMI1BMI2-NEXT: movl %esi, %ebx 2908; X64-BMI1BMI2-NEXT: movq %rdi, %r14 2909; X64-BMI1BMI2-NEXT: movl %ebx, %eax 2910; X64-BMI1BMI2-NEXT: negb %al 2911; X64-BMI1BMI2-NEXT: movq $-1, %rcx 2912; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi 2913; X64-BMI1BMI2-NEXT: callq use64 2914; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax 2915; X64-BMI1BMI2-NEXT: addq $8, %rsp 2916; X64-BMI1BMI2-NEXT: popq %rbx 2917; X64-BMI1BMI2-NEXT: popq %r14 2918; X64-BMI1BMI2-NEXT: retq 2919 %numhighbits = sub i8 64, %numlowbits 2920 %sh_prom = zext i8 %numhighbits to i64 2921 %mask = lshr i64 -1, %sh_prom 2922 call void @use64(i64 %mask) 2923 %masked = and i64 %mask, %val 2924 ret i64 %masked 2925} 2926 2927define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { 2928; X86-NOBMI-LABEL: bzhi64_c2_load: 2929; X86-NOBMI: # %bb.0: 2930; X86-NOBMI-NEXT: pushl %ebx 2931; X86-NOBMI-NEXT: pushl %edi 2932; X86-NOBMI-NEXT: pushl %esi 2933; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2934; X86-NOBMI-NEXT: movb $64, %cl 2935; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2936; X86-NOBMI-NEXT: movl $-1, %eax 2937; X86-NOBMI-NEXT: movl $-1, %ebx 2938; X86-NOBMI-NEXT: shrl %cl, %ebx 2939; X86-NOBMI-NEXT: testb $32, %cl 2940; X86-NOBMI-NEXT: je .LBB36_2 2941; X86-NOBMI-NEXT: # %bb.1: 2942; X86-NOBMI-NEXT: movl %ebx, %eax 2943; X86-NOBMI-NEXT: xorl %ebx, %ebx 2944; X86-NOBMI-NEXT: .LBB36_2: 2945; X86-NOBMI-NEXT: movl 4(%edx), %esi 2946; X86-NOBMI-NEXT: andl %ebx, %esi 2947; X86-NOBMI-NEXT: movl (%edx), %edi 2948; X86-NOBMI-NEXT: andl %eax, %edi 2949; X86-NOBMI-NEXT: subl $8, %esp 2950; X86-NOBMI-NEXT: pushl %ebx 2951; X86-NOBMI-NEXT: pushl %eax 2952; X86-NOBMI-NEXT: calll use64 2953; X86-NOBMI-NEXT: addl $16, %esp 2954; X86-NOBMI-NEXT: movl %edi, %eax 2955; X86-NOBMI-NEXT: movl %esi, %edx 2956; X86-NOBMI-NEXT: popl %esi 2957; X86-NOBMI-NEXT: popl %edi 2958; X86-NOBMI-NEXT: popl %ebx 2959; X86-NOBMI-NEXT: retl 2960; 2961; X86-BMI1NOTBM-LABEL: bzhi64_c2_load: 2962; X86-BMI1NOTBM: # %bb.0: 2963; X86-BMI1NOTBM-NEXT: pushl %ebx 2964; X86-BMI1NOTBM-NEXT: pushl %edi 2965; X86-BMI1NOTBM-NEXT: pushl %esi 2966; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx 2967; X86-BMI1NOTBM-NEXT: movb $64, %cl 2968; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 2969; X86-BMI1NOTBM-NEXT: movl $-1, %eax 2970; X86-BMI1NOTBM-NEXT: movl $-1, %ebx 2971; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx 2972; X86-BMI1NOTBM-NEXT: testb $32, %cl 2973; X86-BMI1NOTBM-NEXT: je .LBB36_2 2974; X86-BMI1NOTBM-NEXT: # %bb.1: 2975; X86-BMI1NOTBM-NEXT: movl %ebx, %eax 2976; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx 2977; X86-BMI1NOTBM-NEXT: .LBB36_2: 2978; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi 2979; X86-BMI1NOTBM-NEXT: andl %ebx, %esi 2980; X86-BMI1NOTBM-NEXT: movl (%edx), %edi 2981; X86-BMI1NOTBM-NEXT: andl %eax, %edi 2982; X86-BMI1NOTBM-NEXT: subl $8, %esp 2983; X86-BMI1NOTBM-NEXT: pushl %ebx 2984; X86-BMI1NOTBM-NEXT: pushl %eax 2985; X86-BMI1NOTBM-NEXT: calll use64 2986; X86-BMI1NOTBM-NEXT: addl $16, %esp 2987; X86-BMI1NOTBM-NEXT: movl %edi, %eax 2988; X86-BMI1NOTBM-NEXT: movl %esi, %edx 2989; X86-BMI1NOTBM-NEXT: popl %esi 2990; X86-BMI1NOTBM-NEXT: popl %edi 2991; X86-BMI1NOTBM-NEXT: popl %ebx 2992; X86-BMI1NOTBM-NEXT: retl 2993; 2994; X86-BMI1BMI2-LABEL: bzhi64_c2_load: 2995; X86-BMI1BMI2: # %bb.0: 2996; X86-BMI1BMI2-NEXT: pushl %ebx 2997; X86-BMI1BMI2-NEXT: pushl %edi 2998; X86-BMI1BMI2-NEXT: pushl %esi 2999; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3000; X86-BMI1BMI2-NEXT: movb $64, %bl 3001; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 3002; X86-BMI1BMI2-NEXT: movl $-1, %ecx 3003; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx 3004; X86-BMI1BMI2-NEXT: testb $32, %bl 3005; X86-BMI1BMI2-NEXT: je .LBB36_2 3006; X86-BMI1BMI2-NEXT: # %bb.1: 3007; X86-BMI1BMI2-NEXT: movl %edx, %ecx 3008; X86-BMI1BMI2-NEXT: xorl %edx, %edx 3009; X86-BMI1BMI2-NEXT: .LBB36_2: 3010; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi 3011; X86-BMI1BMI2-NEXT: andl %edx, %esi 3012; X86-BMI1BMI2-NEXT: movl (%eax), %edi 3013; X86-BMI1BMI2-NEXT: andl %ecx, %edi 3014; X86-BMI1BMI2-NEXT: subl $8, %esp 3015; X86-BMI1BMI2-NEXT: pushl %edx 3016; X86-BMI1BMI2-NEXT: pushl %ecx 3017; X86-BMI1BMI2-NEXT: calll use64 3018; X86-BMI1BMI2-NEXT: addl $16, %esp 3019; X86-BMI1BMI2-NEXT: movl %edi, %eax 3020; X86-BMI1BMI2-NEXT: movl %esi, %edx 3021; X86-BMI1BMI2-NEXT: popl %esi 3022; X86-BMI1BMI2-NEXT: popl %edi 3023; X86-BMI1BMI2-NEXT: popl %ebx 3024; X86-BMI1BMI2-NEXT: retl 3025; 3026; X64-NOBMI-LABEL: bzhi64_c2_load: 3027; X64-NOBMI: # %bb.0: 3028; X64-NOBMI-NEXT: pushq %rbx 3029; X64-NOBMI-NEXT: movq %rsi, %rcx 3030; X64-NOBMI-NEXT: negb %cl 3031; X64-NOBMI-NEXT: movq $-1, %rax 3032; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3033; X64-NOBMI-NEXT: shrq %cl, %rax 3034; X64-NOBMI-NEXT: movq (%rdi), %rbx 3035; X64-NOBMI-NEXT: andq %rax, %rbx 3036; X64-NOBMI-NEXT: movq %rax, %rdi 3037; X64-NOBMI-NEXT: callq use64 3038; X64-NOBMI-NEXT: movq %rbx, %rax 3039; X64-NOBMI-NEXT: popq %rbx 3040; X64-NOBMI-NEXT: retq 3041; 3042; X64-BMI1NOTBM-LABEL: bzhi64_c2_load: 3043; X64-BMI1NOTBM: # %bb.0: 3044; X64-BMI1NOTBM-NEXT: pushq %rbx 3045; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx 3046; X64-BMI1NOTBM-NEXT: negb %cl 3047; X64-BMI1NOTBM-NEXT: movq $-1, %rax 3048; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx 3049; X64-BMI1NOTBM-NEXT: shrq %cl, %rax 3050; X64-BMI1NOTBM-NEXT: movq (%rdi), %rbx 3051; X64-BMI1NOTBM-NEXT: andq %rax, %rbx 3052; X64-BMI1NOTBM-NEXT: movq %rax, %rdi 3053; X64-BMI1NOTBM-NEXT: callq use64 3054; X64-BMI1NOTBM-NEXT: movq %rbx, %rax 3055; X64-BMI1NOTBM-NEXT: popq %rbx 3056; X64-BMI1NOTBM-NEXT: retq 3057; 3058; X64-BMI1BMI2-LABEL: bzhi64_c2_load: 3059; X64-BMI1BMI2: # %bb.0: 3060; X64-BMI1BMI2-NEXT: pushq %rbx 3061; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx 3062; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 3063; X64-BMI1BMI2-NEXT: negb %sil 3064; X64-BMI1BMI2-NEXT: movq $-1, %rax 3065; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi 3066; X64-BMI1BMI2-NEXT: callq use64 3067; X64-BMI1BMI2-NEXT: movq %rbx, %rax 3068; X64-BMI1BMI2-NEXT: popq %rbx 3069; X64-BMI1BMI2-NEXT: retq 3070 %val = load i64, i64* %w 3071 %numhighbits = sub i64 64, %numlowbits 3072 %mask = lshr i64 -1, %numhighbits 3073 call void @use64(i64 %mask) 3074 %masked = and i64 %mask, %val 3075 ret i64 %masked 3076} 3077 3078define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 3079; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext: 3080; X86-NOBMI: # %bb.0: 3081; X86-NOBMI-NEXT: pushl %ebx 3082; X86-NOBMI-NEXT: pushl %edi 3083; X86-NOBMI-NEXT: pushl %esi 3084; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3085; X86-NOBMI-NEXT: movb $64, %cl 3086; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3087; X86-NOBMI-NEXT: movl $-1, %eax 3088; X86-NOBMI-NEXT: movl $-1, %ebx 3089; X86-NOBMI-NEXT: shrl %cl, %ebx 3090; X86-NOBMI-NEXT: testb $32, %cl 3091; X86-NOBMI-NEXT: je .LBB37_2 3092; X86-NOBMI-NEXT: # %bb.1: 3093; X86-NOBMI-NEXT: movl %ebx, %eax 3094; X86-NOBMI-NEXT: xorl %ebx, %ebx 3095; X86-NOBMI-NEXT: .LBB37_2: 3096; X86-NOBMI-NEXT: movl 4(%edx), %esi 3097; X86-NOBMI-NEXT: andl %ebx, %esi 3098; X86-NOBMI-NEXT: movl (%edx), %edi 3099; X86-NOBMI-NEXT: andl %eax, %edi 3100; X86-NOBMI-NEXT: subl $8, %esp 3101; X86-NOBMI-NEXT: pushl %ebx 3102; X86-NOBMI-NEXT: pushl %eax 3103; X86-NOBMI-NEXT: calll use64 3104; X86-NOBMI-NEXT: addl $16, %esp 3105; X86-NOBMI-NEXT: movl %edi, %eax 3106; X86-NOBMI-NEXT: movl %esi, %edx 3107; X86-NOBMI-NEXT: popl %esi 3108; X86-NOBMI-NEXT: popl %edi 3109; X86-NOBMI-NEXT: popl %ebx 3110; X86-NOBMI-NEXT: retl 3111; 3112; X86-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext: 3113; X86-BMI1NOTBM: # %bb.0: 3114; X86-BMI1NOTBM-NEXT: pushl %ebx 3115; X86-BMI1NOTBM-NEXT: pushl %edi 3116; X86-BMI1NOTBM-NEXT: pushl %esi 3117; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx 3118; X86-BMI1NOTBM-NEXT: movb $64, %cl 3119; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 3120; X86-BMI1NOTBM-NEXT: movl $-1, %eax 3121; X86-BMI1NOTBM-NEXT: movl $-1, %ebx 3122; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx 3123; X86-BMI1NOTBM-NEXT: testb $32, %cl 3124; X86-BMI1NOTBM-NEXT: je .LBB37_2 3125; X86-BMI1NOTBM-NEXT: # %bb.1: 3126; X86-BMI1NOTBM-NEXT: movl %ebx, %eax 3127; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx 3128; X86-BMI1NOTBM-NEXT: .LBB37_2: 3129; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi 3130; X86-BMI1NOTBM-NEXT: andl %ebx, %esi 3131; X86-BMI1NOTBM-NEXT: movl (%edx), %edi 3132; X86-BMI1NOTBM-NEXT: andl %eax, %edi 3133; X86-BMI1NOTBM-NEXT: subl $8, %esp 3134; X86-BMI1NOTBM-NEXT: pushl %ebx 3135; X86-BMI1NOTBM-NEXT: pushl %eax 3136; X86-BMI1NOTBM-NEXT: calll use64 3137; X86-BMI1NOTBM-NEXT: addl $16, %esp 3138; X86-BMI1NOTBM-NEXT: movl %edi, %eax 3139; X86-BMI1NOTBM-NEXT: movl %esi, %edx 3140; X86-BMI1NOTBM-NEXT: popl %esi 3141; X86-BMI1NOTBM-NEXT: popl %edi 3142; X86-BMI1NOTBM-NEXT: popl %ebx 3143; X86-BMI1NOTBM-NEXT: retl 3144; 3145; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: 3146; X86-BMI1BMI2: # %bb.0: 3147; X86-BMI1BMI2-NEXT: pushl %ebx 3148; X86-BMI1BMI2-NEXT: pushl %edi 3149; X86-BMI1BMI2-NEXT: pushl %esi 3150; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3151; X86-BMI1BMI2-NEXT: movb $64, %bl 3152; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 3153; X86-BMI1BMI2-NEXT: movl $-1, %ecx 3154; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx 3155; X86-BMI1BMI2-NEXT: testb $32, %bl 3156; X86-BMI1BMI2-NEXT: je .LBB37_2 3157; X86-BMI1BMI2-NEXT: # %bb.1: 3158; X86-BMI1BMI2-NEXT: movl %edx, %ecx 3159; X86-BMI1BMI2-NEXT: xorl %edx, %edx 3160; X86-BMI1BMI2-NEXT: .LBB37_2: 3161; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi 3162; X86-BMI1BMI2-NEXT: andl %edx, %esi 3163; X86-BMI1BMI2-NEXT: movl (%eax), %edi 3164; X86-BMI1BMI2-NEXT: andl %ecx, %edi 3165; X86-BMI1BMI2-NEXT: subl $8, %esp 3166; X86-BMI1BMI2-NEXT: pushl %edx 3167; X86-BMI1BMI2-NEXT: pushl %ecx 3168; X86-BMI1BMI2-NEXT: calll use64 3169; X86-BMI1BMI2-NEXT: addl $16, %esp 3170; X86-BMI1BMI2-NEXT: movl %edi, %eax 3171; X86-BMI1BMI2-NEXT: movl %esi, %edx 3172; X86-BMI1BMI2-NEXT: popl %esi 3173; X86-BMI1BMI2-NEXT: popl %edi 3174; X86-BMI1BMI2-NEXT: popl %ebx 3175; X86-BMI1BMI2-NEXT: retl 3176; 3177; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: 3178; X64-NOBMI: # %bb.0: 3179; X64-NOBMI-NEXT: pushq %rbx 3180; X64-NOBMI-NEXT: movl %esi, %ecx 3181; X64-NOBMI-NEXT: negb %cl 3182; X64-NOBMI-NEXT: movq $-1, %rax 3183; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3184; X64-NOBMI-NEXT: shrq %cl, %rax 3185; X64-NOBMI-NEXT: movq (%rdi), %rbx 3186; X64-NOBMI-NEXT: andq %rax, %rbx 3187; X64-NOBMI-NEXT: movq %rax, %rdi 3188; X64-NOBMI-NEXT: callq use64 3189; X64-NOBMI-NEXT: movq %rbx, %rax 3190; X64-NOBMI-NEXT: popq %rbx 3191; X64-NOBMI-NEXT: retq 3192; 3193; X64-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext: 3194; X64-BMI1NOTBM: # %bb.0: 3195; X64-BMI1NOTBM-NEXT: pushq %rbx 3196; X64-BMI1NOTBM-NEXT: movl %esi, %ecx 3197; X64-BMI1NOTBM-NEXT: negb %cl 3198; X64-BMI1NOTBM-NEXT: movq $-1, %rax 3199; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx 3200; X64-BMI1NOTBM-NEXT: shrq %cl, %rax 3201; X64-BMI1NOTBM-NEXT: movq (%rdi), %rbx 3202; X64-BMI1NOTBM-NEXT: andq %rax, %rbx 3203; X64-BMI1NOTBM-NEXT: movq %rax, %rdi 3204; X64-BMI1NOTBM-NEXT: callq use64 3205; X64-BMI1NOTBM-NEXT: movq %rbx, %rax 3206; X64-BMI1NOTBM-NEXT: popq %rbx 3207; X64-BMI1NOTBM-NEXT: retq 3208; 3209; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: 3210; X64-BMI1BMI2: # %bb.0: 3211; X64-BMI1BMI2-NEXT: pushq %rbx 3212; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 3213; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx 3214; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 3215; X64-BMI1BMI2-NEXT: negb %sil 3216; X64-BMI1BMI2-NEXT: movq $-1, %rax 3217; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi 3218; X64-BMI1BMI2-NEXT: callq use64 3219; X64-BMI1BMI2-NEXT: movq %rbx, %rax 3220; X64-BMI1BMI2-NEXT: popq %rbx 3221; X64-BMI1BMI2-NEXT: retq 3222 %val = load i64, i64* %w 3223 %numhighbits = sub i8 64, %numlowbits 3224 %sh_prom = zext i8 %numhighbits to i64 3225 %mask = lshr i64 -1, %sh_prom 3226 call void @use64(i64 %mask) 3227 %masked = and i64 %mask, %val 3228 ret i64 %masked 3229} 3230 3231define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { 3232; X86-NOBMI-LABEL: bzhi64_c4_commutative: 3233; X86-NOBMI: # %bb.0: 3234; X86-NOBMI-NEXT: pushl %edi 3235; X86-NOBMI-NEXT: pushl %esi 3236; X86-NOBMI-NEXT: pushl %eax 3237; X86-NOBMI-NEXT: movb $64, %cl 3238; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3239; X86-NOBMI-NEXT: movl $-1, %esi 3240; X86-NOBMI-NEXT: movl $-1, %edi 3241; X86-NOBMI-NEXT: shrl %cl, %edi 3242; X86-NOBMI-NEXT: testb $32, %cl 3243; X86-NOBMI-NEXT: je .LBB38_2 3244; X86-NOBMI-NEXT: # %bb.1: 3245; X86-NOBMI-NEXT: movl %edi, %esi 3246; X86-NOBMI-NEXT: xorl %edi, %edi 3247; X86-NOBMI-NEXT: .LBB38_2: 3248; X86-NOBMI-NEXT: subl $8, %esp 3249; X86-NOBMI-NEXT: pushl %edi 3250; X86-NOBMI-NEXT: pushl %esi 3251; X86-NOBMI-NEXT: calll use64 3252; X86-NOBMI-NEXT: addl $16, %esp 3253; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi 3254; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi 3255; X86-NOBMI-NEXT: movl %esi, %eax 3256; X86-NOBMI-NEXT: movl %edi, %edx 3257; X86-NOBMI-NEXT: addl $4, %esp 3258; X86-NOBMI-NEXT: popl %esi 3259; X86-NOBMI-NEXT: popl %edi 3260; X86-NOBMI-NEXT: retl 3261; 3262; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative: 3263; X86-BMI1NOTBM: # %bb.0: 3264; X86-BMI1NOTBM-NEXT: pushl %edi 3265; X86-BMI1NOTBM-NEXT: pushl %esi 3266; X86-BMI1NOTBM-NEXT: pushl %eax 3267; X86-BMI1NOTBM-NEXT: movb $64, %cl 3268; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 3269; X86-BMI1NOTBM-NEXT: movl $-1, %esi 3270; X86-BMI1NOTBM-NEXT: movl $-1, %edi 3271; X86-BMI1NOTBM-NEXT: shrl %cl, %edi 3272; X86-BMI1NOTBM-NEXT: testb $32, %cl 3273; X86-BMI1NOTBM-NEXT: je .LBB38_2 3274; X86-BMI1NOTBM-NEXT: # %bb.1: 3275; X86-BMI1NOTBM-NEXT: movl %edi, %esi 3276; X86-BMI1NOTBM-NEXT: xorl %edi, %edi 3277; X86-BMI1NOTBM-NEXT: .LBB38_2: 3278; X86-BMI1NOTBM-NEXT: subl $8, %esp 3279; X86-BMI1NOTBM-NEXT: pushl %edi 3280; X86-BMI1NOTBM-NEXT: pushl %esi 3281; X86-BMI1NOTBM-NEXT: calll use64 3282; X86-BMI1NOTBM-NEXT: addl $16, %esp 3283; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi 3284; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi 3285; X86-BMI1NOTBM-NEXT: movl %esi, %eax 3286; X86-BMI1NOTBM-NEXT: movl %edi, %edx 3287; X86-BMI1NOTBM-NEXT: addl $4, %esp 3288; X86-BMI1NOTBM-NEXT: popl %esi 3289; X86-BMI1NOTBM-NEXT: popl %edi 3290; X86-BMI1NOTBM-NEXT: retl 3291; 3292; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative: 3293; X86-BMI1BMI2: # %bb.0: 3294; X86-BMI1BMI2-NEXT: pushl %edi 3295; X86-BMI1BMI2-NEXT: pushl %esi 3296; X86-BMI1BMI2-NEXT: pushl %eax 3297; X86-BMI1BMI2-NEXT: movb $64, %al 3298; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 3299; X86-BMI1BMI2-NEXT: movl $-1, %edi 3300; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi 3301; X86-BMI1BMI2-NEXT: testb $32, %al 3302; X86-BMI1BMI2-NEXT: je .LBB38_2 3303; X86-BMI1BMI2-NEXT: # %bb.1: 3304; X86-BMI1BMI2-NEXT: movl %esi, %edi 3305; X86-BMI1BMI2-NEXT: xorl %esi, %esi 3306; X86-BMI1BMI2-NEXT: .LBB38_2: 3307; X86-BMI1BMI2-NEXT: subl $8, %esp 3308; X86-BMI1BMI2-NEXT: pushl %esi 3309; X86-BMI1BMI2-NEXT: pushl %edi 3310; X86-BMI1BMI2-NEXT: calll use64 3311; X86-BMI1BMI2-NEXT: addl $16, %esp 3312; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 3313; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 3314; X86-BMI1BMI2-NEXT: movl %edi, %eax 3315; X86-BMI1BMI2-NEXT: movl %esi, %edx 3316; X86-BMI1BMI2-NEXT: addl $4, %esp 3317; X86-BMI1BMI2-NEXT: popl %esi 3318; X86-BMI1BMI2-NEXT: popl %edi 3319; X86-BMI1BMI2-NEXT: retl 3320; 3321; X64-NOBMI-LABEL: bzhi64_c4_commutative: 3322; X64-NOBMI: # %bb.0: 3323; X64-NOBMI-NEXT: pushq %r14 3324; X64-NOBMI-NEXT: pushq %rbx 3325; X64-NOBMI-NEXT: pushq %rax 3326; X64-NOBMI-NEXT: movq %rsi, %rcx 3327; X64-NOBMI-NEXT: movq %rdi, %r14 3328; X64-NOBMI-NEXT: negb %cl 3329; X64-NOBMI-NEXT: movq $-1, %rbx 3330; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3331; X64-NOBMI-NEXT: shrq %cl, %rbx 3332; X64-NOBMI-NEXT: movq %rbx, %rdi 3333; X64-NOBMI-NEXT: callq use64 3334; X64-NOBMI-NEXT: andq %r14, %rbx 3335; X64-NOBMI-NEXT: movq %rbx, %rax 3336; X64-NOBMI-NEXT: addq $8, %rsp 3337; X64-NOBMI-NEXT: popq %rbx 3338; X64-NOBMI-NEXT: popq %r14 3339; X64-NOBMI-NEXT: retq 3340; 3341; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative: 3342; X64-BMI1NOTBM: # %bb.0: 3343; X64-BMI1NOTBM-NEXT: pushq %r14 3344; X64-BMI1NOTBM-NEXT: pushq %rbx 3345; X64-BMI1NOTBM-NEXT: pushq %rax 3346; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx 3347; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 3348; X64-BMI1NOTBM-NEXT: negb %cl 3349; X64-BMI1NOTBM-NEXT: movq $-1, %rbx 3350; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx 3351; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx 3352; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi 3353; X64-BMI1NOTBM-NEXT: callq use64 3354; X64-BMI1NOTBM-NEXT: andq %r14, %rbx 3355; X64-BMI1NOTBM-NEXT: movq %rbx, %rax 3356; X64-BMI1NOTBM-NEXT: addq $8, %rsp 3357; X64-BMI1NOTBM-NEXT: popq %rbx 3358; X64-BMI1NOTBM-NEXT: popq %r14 3359; X64-BMI1NOTBM-NEXT: retq 3360; 3361; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative: 3362; X64-BMI1BMI2: # %bb.0: 3363; X64-BMI1BMI2-NEXT: pushq %r14 3364; X64-BMI1BMI2-NEXT: pushq %rbx 3365; X64-BMI1BMI2-NEXT: pushq %rax 3366; X64-BMI1BMI2-NEXT: movq %rsi, %rbx 3367; X64-BMI1BMI2-NEXT: movq %rdi, %r14 3368; X64-BMI1BMI2-NEXT: movl %ebx, %eax 3369; X64-BMI1BMI2-NEXT: negb %al 3370; X64-BMI1BMI2-NEXT: movq $-1, %rcx 3371; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi 3372; X64-BMI1BMI2-NEXT: callq use64 3373; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax 3374; X64-BMI1BMI2-NEXT: addq $8, %rsp 3375; X64-BMI1BMI2-NEXT: popq %rbx 3376; X64-BMI1BMI2-NEXT: popq %r14 3377; X64-BMI1BMI2-NEXT: retq 3378 %numhighbits = sub i64 64, %numlowbits 3379 %mask = lshr i64 -1, %numhighbits 3380 call void @use64(i64 %mask) 3381 %masked = and i64 %val, %mask ; swapped order 3382 ret i64 %masked 3383} 3384 3385; 64-bit, but with 32-bit output 3386 3387; Everything done in 64-bit, truncation happens last. 3388define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { 3389; X86-NOBMI-LABEL: bzhi64_32_c0: 3390; X86-NOBMI: # %bb.0: 3391; X86-NOBMI-NEXT: movb $64, %cl 3392; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3393; X86-NOBMI-NEXT: movl $-1, %eax 3394; X86-NOBMI-NEXT: shrl %cl, %eax 3395; X86-NOBMI-NEXT: testb $32, %cl 3396; X86-NOBMI-NEXT: jne .LBB39_2 3397; X86-NOBMI-NEXT: # %bb.1: 3398; X86-NOBMI-NEXT: movl $-1, %eax 3399; X86-NOBMI-NEXT: .LBB39_2: 3400; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 3401; X86-NOBMI-NEXT: retl 3402; 3403; X86-BMI1NOTBM-LABEL: bzhi64_32_c0: 3404; X86-BMI1NOTBM: # %bb.0: 3405; X86-BMI1NOTBM-NEXT: movb $64, %cl 3406; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 3407; X86-BMI1NOTBM-NEXT: movl $-1, %eax 3408; X86-BMI1NOTBM-NEXT: shrl %cl, %eax 3409; X86-BMI1NOTBM-NEXT: testb $32, %cl 3410; X86-BMI1NOTBM-NEXT: jne .LBB39_2 3411; X86-BMI1NOTBM-NEXT: # %bb.1: 3412; X86-BMI1NOTBM-NEXT: movl $-1, %eax 3413; X86-BMI1NOTBM-NEXT: .LBB39_2: 3414; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax 3415; X86-BMI1NOTBM-NEXT: retl 3416; 3417; X86-BMI1BMI2-LABEL: bzhi64_32_c0: 3418; X86-BMI1BMI2: # %bb.0: 3419; X86-BMI1BMI2-NEXT: movb $64, %cl 3420; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3421; X86-BMI1BMI2-NEXT: movl $-1, %eax 3422; X86-BMI1BMI2-NEXT: testb $32, %cl 3423; X86-BMI1BMI2-NEXT: je .LBB39_2 3424; X86-BMI1BMI2-NEXT: # %bb.1: 3425; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax 3426; X86-BMI1BMI2-NEXT: .LBB39_2: 3427; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 3428; X86-BMI1BMI2-NEXT: retl 3429; 3430; X64-NOBMI-LABEL: bzhi64_32_c0: 3431; X64-NOBMI: # %bb.0: 3432; X64-NOBMI-NEXT: movq %rsi, %rcx 3433; X64-NOBMI-NEXT: negb %cl 3434; X64-NOBMI-NEXT: movq $-1, %rax 3435; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3436; X64-NOBMI-NEXT: shrq %cl, %rax 3437; X64-NOBMI-NEXT: andl %edi, %eax 3438; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3439; X64-NOBMI-NEXT: retq 3440; 3441; X64-BMI1NOTBM-LABEL: bzhi64_32_c0: 3442; X64-BMI1NOTBM: # %bb.0: 3443; X64-BMI1NOTBM-NEXT: shll $8, %esi 3444; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 3445; X64-BMI1NOTBM-NEXT: retq 3446; 3447; X64-BMI1BMI2-LABEL: bzhi64_32_c0: 3448; X64-BMI1BMI2: # %bb.0: 3449; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 3450; X64-BMI1BMI2-NEXT: retq 3451 %numhighbits = sub i64 64, %numlowbits 3452 %mask = lshr i64 -1, %numhighbits 3453 %masked = and i64 %mask, %val 3454 %res = trunc i64 %masked to i32 3455 ret i32 %res 3456} 3457 3458; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 3459define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { 3460; X86-NOBMI-LABEL: bzhi64_32_c1: 3461; X86-NOBMI: # %bb.0: 3462; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3463; X86-NOBMI-NEXT: xorl %ecx, %ecx 3464; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3465; X86-NOBMI-NEXT: shll %cl, %eax 3466; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3467; X86-NOBMI-NEXT: shrl %cl, %eax 3468; X86-NOBMI-NEXT: retl 3469; 3470; X86-BMI1NOTBM-LABEL: bzhi64_32_c1: 3471; X86-BMI1NOTBM: # %bb.0: 3472; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 3473; X86-BMI1NOTBM-NEXT: shll $8, %eax 3474; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3475; X86-BMI1NOTBM-NEXT: retl 3476; 3477; X86-BMI1BMI2-LABEL: bzhi64_32_c1: 3478; X86-BMI1BMI2: # %bb.0: 3479; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 3480; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3481; X86-BMI1BMI2-NEXT: retl 3482; 3483; X64-NOBMI-LABEL: bzhi64_32_c1: 3484; X64-NOBMI: # %bb.0: 3485; X64-NOBMI-NEXT: movl %esi, %ecx 3486; X64-NOBMI-NEXT: movq %rdi, %rax 3487; X64-NOBMI-NEXT: negb %cl 3488; X64-NOBMI-NEXT: shll %cl, %eax 3489; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3490; X64-NOBMI-NEXT: shrl %cl, %eax 3491; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3492; X64-NOBMI-NEXT: retq 3493; 3494; X64-BMI1NOTBM-LABEL: bzhi64_32_c1: 3495; X64-BMI1NOTBM: # %bb.0: 3496; X64-BMI1NOTBM-NEXT: shll $8, %esi 3497; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 3498; X64-BMI1NOTBM-NEXT: retq 3499; 3500; X64-BMI1BMI2-LABEL: bzhi64_32_c1: 3501; X64-BMI1BMI2: # %bb.0: 3502; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 3503; X64-BMI1BMI2-NEXT: retq 3504 %truncval = trunc i64 %val to i32 3505 %numhighbits = sub i32 32, %numlowbits 3506 %mask = lshr i32 -1, %numhighbits 3507 %masked = and i32 %mask, %truncval 3508 ret i32 %masked 3509} 3510 3511; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 3512; Masking is 64-bit. Then truncation. 3513define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { 3514; X86-NOBMI-LABEL: bzhi64_32_c2: 3515; X86-NOBMI: # %bb.0: 3516; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3517; X86-NOBMI-NEXT: xorl %ecx, %ecx 3518; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3519; X86-NOBMI-NEXT: shll %cl, %eax 3520; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3521; X86-NOBMI-NEXT: shrl %cl, %eax 3522; X86-NOBMI-NEXT: retl 3523; 3524; X86-BMI1NOTBM-LABEL: bzhi64_32_c2: 3525; X86-BMI1NOTBM: # %bb.0: 3526; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 3527; X86-BMI1NOTBM-NEXT: shll $8, %eax 3528; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3529; X86-BMI1NOTBM-NEXT: retl 3530; 3531; X86-BMI1BMI2-LABEL: bzhi64_32_c2: 3532; X86-BMI1BMI2: # %bb.0: 3533; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 3534; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3535; X86-BMI1BMI2-NEXT: retl 3536; 3537; X64-NOBMI-LABEL: bzhi64_32_c2: 3538; X64-NOBMI: # %bb.0: 3539; X64-NOBMI-NEXT: movl %esi, %ecx 3540; X64-NOBMI-NEXT: movq %rdi, %rax 3541; X64-NOBMI-NEXT: negb %cl 3542; X64-NOBMI-NEXT: shll %cl, %eax 3543; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3544; X64-NOBMI-NEXT: shrl %cl, %eax 3545; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3546; X64-NOBMI-NEXT: retq 3547; 3548; X64-BMI1NOTBM-LABEL: bzhi64_32_c2: 3549; X64-BMI1NOTBM: # %bb.0: 3550; X64-BMI1NOTBM-NEXT: shll $8, %esi 3551; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 3552; X64-BMI1NOTBM-NEXT: retq 3553; 3554; X64-BMI1BMI2-LABEL: bzhi64_32_c2: 3555; X64-BMI1BMI2: # %bb.0: 3556; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 3557; X64-BMI1BMI2-NEXT: retq 3558 %numhighbits = sub i32 32, %numlowbits 3559 %mask = lshr i32 -1, %numhighbits 3560 %zextmask = zext i32 %mask to i64 3561 %masked = and i64 %zextmask, %val 3562 %truncmasked = trunc i64 %masked to i32 3563 ret i32 %truncmasked 3564} 3565 3566; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 3567; Masking is 64-bit. Then truncation. 3568define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind { 3569; X86-LABEL: bzhi64_32_c3: 3570; X86: # %bb.0: 3571; X86-NEXT: movb $64, %cl 3572; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 3573; X86-NEXT: xorl %eax, %eax 3574; X86-NEXT: movl $-1, %edx 3575; X86-NEXT: shrdl %cl, %eax, %edx 3576; X86-NEXT: testb $32, %cl 3577; X86-NEXT: jne .LBB42_2 3578; X86-NEXT: # %bb.1: 3579; X86-NEXT: movl %edx, %eax 3580; X86-NEXT: .LBB42_2: 3581; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 3582; X86-NEXT: retl 3583; 3584; X64-NOBMI-LABEL: bzhi64_32_c3: 3585; X64-NOBMI: # %bb.0: 3586; X64-NOBMI-NEXT: movq %rsi, %rcx 3587; X64-NOBMI-NEXT: negb %cl 3588; X64-NOBMI-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3589; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3590; X64-NOBMI-NEXT: shrq %cl, %rax 3591; X64-NOBMI-NEXT: andl %edi, %eax 3592; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3593; X64-NOBMI-NEXT: retq 3594; 3595; X64-BMI1NOTBM-LABEL: bzhi64_32_c3: 3596; X64-BMI1NOTBM: # %bb.0: 3597; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx 3598; X64-BMI1NOTBM-NEXT: negb %cl 3599; X64-BMI1NOTBM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3600; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx 3601; X64-BMI1NOTBM-NEXT: shrq %cl, %rax 3602; X64-BMI1NOTBM-NEXT: andl %edi, %eax 3603; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax 3604; X64-BMI1NOTBM-NEXT: retq 3605; 3606; X64-BMI1BMI2-LABEL: bzhi64_32_c3: 3607; X64-BMI1BMI2: # %bb.0: 3608; X64-BMI1BMI2-NEXT: negb %sil 3609; X64-BMI1BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3610; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rax 3611; X64-BMI1BMI2-NEXT: andl %edi, %eax 3612; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax 3613; X64-BMI1BMI2-NEXT: retq 3614 %numhighbits = sub i64 64, %numlowbits 3615 %mask = lshr i64 4294967295, %numhighbits 3616 %masked = and i64 %mask, %val 3617 %truncmasked = trunc i64 %masked to i32 3618 ret i32 %truncmasked 3619} 3620 3621; ---------------------------------------------------------------------------- ; 3622; Pattern d. 32-bit. 3623; ---------------------------------------------------------------------------- ; 3624 3625define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { 3626; X86-NOBMI-LABEL: bzhi32_d0: 3627; X86-NOBMI: # %bb.0: 3628; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3629; X86-NOBMI-NEXT: xorl %ecx, %ecx 3630; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3631; X86-NOBMI-NEXT: shll %cl, %eax 3632; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3633; X86-NOBMI-NEXT: shrl %cl, %eax 3634; X86-NOBMI-NEXT: retl 3635; 3636; X86-BMI1NOTBM-LABEL: bzhi32_d0: 3637; X86-BMI1NOTBM: # %bb.0: 3638; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 3639; X86-BMI1NOTBM-NEXT: shll $8, %eax 3640; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3641; X86-BMI1NOTBM-NEXT: retl 3642; 3643; X86-BMI1BMI2-LABEL: bzhi32_d0: 3644; X86-BMI1BMI2: # %bb.0: 3645; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 3646; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3647; X86-BMI1BMI2-NEXT: retl 3648; 3649; X64-NOBMI-LABEL: bzhi32_d0: 3650; X64-NOBMI: # %bb.0: 3651; X64-NOBMI-NEXT: movl %esi, %ecx 3652; X64-NOBMI-NEXT: movl %edi, %eax 3653; X64-NOBMI-NEXT: negb %cl 3654; X64-NOBMI-NEXT: shll %cl, %eax 3655; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3656; X64-NOBMI-NEXT: shrl %cl, %eax 3657; X64-NOBMI-NEXT: retq 3658; 3659; X64-BMI1NOTBM-LABEL: bzhi32_d0: 3660; X64-BMI1NOTBM: # %bb.0: 3661; X64-BMI1NOTBM-NEXT: shll $8, %esi 3662; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 3663; X64-BMI1NOTBM-NEXT: retq 3664; 3665; X64-BMI1BMI2-LABEL: bzhi32_d0: 3666; X64-BMI1BMI2: # %bb.0: 3667; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 3668; X64-BMI1BMI2-NEXT: retq 3669 %numhighbits = sub i32 32, %numlowbits 3670 %highbitscleared = shl i32 %val, %numhighbits 3671 %masked = lshr i32 %highbitscleared, %numhighbits 3672 ret i32 %masked 3673} 3674 3675define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { 3676; X86-NOBMI-LABEL: bzhi32_d1_indexzext: 3677; X86-NOBMI: # %bb.0: 3678; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3679; X86-NOBMI-NEXT: xorl %ecx, %ecx 3680; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3681; X86-NOBMI-NEXT: shll %cl, %eax 3682; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3683; X86-NOBMI-NEXT: shrl %cl, %eax 3684; X86-NOBMI-NEXT: retl 3685; 3686; X86-BMI1NOTBM-LABEL: bzhi32_d1_indexzext: 3687; X86-BMI1NOTBM: # %bb.0: 3688; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 3689; X86-BMI1NOTBM-NEXT: shll $8, %eax 3690; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3691; X86-BMI1NOTBM-NEXT: retl 3692; 3693; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext: 3694; X86-BMI1BMI2: # %bb.0: 3695; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 3696; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3697; X86-BMI1BMI2-NEXT: retl 3698; 3699; X64-NOBMI-LABEL: bzhi32_d1_indexzext: 3700; X64-NOBMI: # %bb.0: 3701; X64-NOBMI-NEXT: movl %esi, %ecx 3702; X64-NOBMI-NEXT: movl %edi, %eax 3703; X64-NOBMI-NEXT: negb %cl 3704; X64-NOBMI-NEXT: shll %cl, %eax 3705; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3706; X64-NOBMI-NEXT: shrl %cl, %eax 3707; X64-NOBMI-NEXT: retq 3708; 3709; X64-BMI1NOTBM-LABEL: bzhi32_d1_indexzext: 3710; X64-BMI1NOTBM: # %bb.0: 3711; X64-BMI1NOTBM-NEXT: shll $8, %esi 3712; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 3713; X64-BMI1NOTBM-NEXT: retq 3714; 3715; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext: 3716; X64-BMI1BMI2: # %bb.0: 3717; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 3718; X64-BMI1BMI2-NEXT: retq 3719 %numhighbits = sub i8 32, %numlowbits 3720 %sh_prom = zext i8 %numhighbits to i32 3721 %highbitscleared = shl i32 %val, %sh_prom 3722 %masked = lshr i32 %highbitscleared, %sh_prom 3723 ret i32 %masked 3724} 3725 3726define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { 3727; X86-NOBMI-LABEL: bzhi32_d2_load: 3728; X86-NOBMI: # %bb.0: 3729; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3730; X86-NOBMI-NEXT: movl (%eax), %eax 3731; X86-NOBMI-NEXT: xorl %ecx, %ecx 3732; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3733; X86-NOBMI-NEXT: shll %cl, %eax 3734; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3735; X86-NOBMI-NEXT: shrl %cl, %eax 3736; X86-NOBMI-NEXT: retl 3737; 3738; X86-BMI1NOTBM-LABEL: bzhi32_d2_load: 3739; X86-BMI1NOTBM: # %bb.0: 3740; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 3741; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 3742; X86-BMI1NOTBM-NEXT: shll $8, %ecx 3743; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax 3744; X86-BMI1NOTBM-NEXT: retl 3745; 3746; X86-BMI1BMI2-LABEL: bzhi32_d2_load: 3747; X86-BMI1BMI2: # %bb.0: 3748; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3749; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 3750; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 3751; X86-BMI1BMI2-NEXT: retl 3752; 3753; X64-NOBMI-LABEL: bzhi32_d2_load: 3754; X64-NOBMI: # %bb.0: 3755; X64-NOBMI-NEXT: movl %esi, %ecx 3756; X64-NOBMI-NEXT: movl (%rdi), %eax 3757; X64-NOBMI-NEXT: negb %cl 3758; X64-NOBMI-NEXT: shll %cl, %eax 3759; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3760; X64-NOBMI-NEXT: shrl %cl, %eax 3761; X64-NOBMI-NEXT: retq 3762; 3763; X64-BMI1NOTBM-LABEL: bzhi32_d2_load: 3764; X64-BMI1NOTBM: # %bb.0: 3765; X64-BMI1NOTBM-NEXT: shll $8, %esi 3766; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax 3767; X64-BMI1NOTBM-NEXT: retq 3768; 3769; X64-BMI1BMI2-LABEL: bzhi32_d2_load: 3770; X64-BMI1BMI2: # %bb.0: 3771; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 3772; X64-BMI1BMI2-NEXT: retq 3773 %val = load i32, i32* %w 3774 %numhighbits = sub i32 32, %numlowbits 3775 %highbitscleared = shl i32 %val, %numhighbits 3776 %masked = lshr i32 %highbitscleared, %numhighbits 3777 ret i32 %masked 3778} 3779 3780define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 3781; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext: 3782; X86-NOBMI: # %bb.0: 3783; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3784; X86-NOBMI-NEXT: movl (%eax), %eax 3785; X86-NOBMI-NEXT: xorl %ecx, %ecx 3786; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3787; X86-NOBMI-NEXT: shll %cl, %eax 3788; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3789; X86-NOBMI-NEXT: shrl %cl, %eax 3790; X86-NOBMI-NEXT: retl 3791; 3792; X86-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext: 3793; X86-BMI1NOTBM: # %bb.0: 3794; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 3795; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl 3796; X86-BMI1NOTBM-NEXT: shll $8, %ecx 3797; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax 3798; X86-BMI1NOTBM-NEXT: retl 3799; 3800; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: 3801; X86-BMI1BMI2: # %bb.0: 3802; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3803; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 3804; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 3805; X86-BMI1BMI2-NEXT: retl 3806; 3807; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: 3808; X64-NOBMI: # %bb.0: 3809; X64-NOBMI-NEXT: movl %esi, %ecx 3810; X64-NOBMI-NEXT: movl (%rdi), %eax 3811; X64-NOBMI-NEXT: negb %cl 3812; X64-NOBMI-NEXT: shll %cl, %eax 3813; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3814; X64-NOBMI-NEXT: shrl %cl, %eax 3815; X64-NOBMI-NEXT: retq 3816; 3817; X64-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext: 3818; X64-BMI1NOTBM: # %bb.0: 3819; X64-BMI1NOTBM-NEXT: shll $8, %esi 3820; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax 3821; X64-BMI1NOTBM-NEXT: retq 3822; 3823; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: 3824; X64-BMI1BMI2: # %bb.0: 3825; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 3826; X64-BMI1BMI2-NEXT: retq 3827 %val = load i32, i32* %w 3828 %numhighbits = sub i8 32, %numlowbits 3829 %sh_prom = zext i8 %numhighbits to i32 3830 %highbitscleared = shl i32 %val, %sh_prom 3831 %masked = lshr i32 %highbitscleared, %sh_prom 3832 ret i32 %masked 3833} 3834 3835; 64-bit. 3836 3837define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { 3838; X86-NOBMI-LABEL: bzhi64_d0: 3839; X86-NOBMI: # %bb.0: 3840; X86-NOBMI-NEXT: pushl %ebx 3841; X86-NOBMI-NEXT: pushl %edi 3842; X86-NOBMI-NEXT: pushl %esi 3843; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3844; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3845; X86-NOBMI-NEXT: movb $64, %cl 3846; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3847; X86-NOBMI-NEXT: movl %edx, %esi 3848; X86-NOBMI-NEXT: shll %cl, %esi 3849; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3850; X86-NOBMI-NEXT: testb $32, %cl 3851; X86-NOBMI-NEXT: movl %esi, %edi 3852; X86-NOBMI-NEXT: jne .LBB47_2 3853; X86-NOBMI-NEXT: # %bb.1: 3854; X86-NOBMI-NEXT: movl %eax, %edi 3855; X86-NOBMI-NEXT: .LBB47_2: 3856; X86-NOBMI-NEXT: movl %edi, %eax 3857; X86-NOBMI-NEXT: shrl %cl, %eax 3858; X86-NOBMI-NEXT: xorl %ebx, %ebx 3859; X86-NOBMI-NEXT: testb $32, %cl 3860; X86-NOBMI-NEXT: movl $0, %edx 3861; X86-NOBMI-NEXT: jne .LBB47_4 3862; X86-NOBMI-NEXT: # %bb.3: 3863; X86-NOBMI-NEXT: movl %esi, %ebx 3864; X86-NOBMI-NEXT: movl %eax, %edx 3865; X86-NOBMI-NEXT: .LBB47_4: 3866; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3867; X86-NOBMI-NEXT: testb $32, %cl 3868; X86-NOBMI-NEXT: jne .LBB47_6 3869; X86-NOBMI-NEXT: # %bb.5: 3870; X86-NOBMI-NEXT: movl %ebx, %eax 3871; X86-NOBMI-NEXT: .LBB47_6: 3872; X86-NOBMI-NEXT: popl %esi 3873; X86-NOBMI-NEXT: popl %edi 3874; X86-NOBMI-NEXT: popl %ebx 3875; X86-NOBMI-NEXT: retl 3876; 3877; X86-BMI1NOTBM-LABEL: bzhi64_d0: 3878; X86-BMI1NOTBM: # %bb.0: 3879; X86-BMI1NOTBM-NEXT: pushl %ebx 3880; X86-BMI1NOTBM-NEXT: pushl %edi 3881; X86-BMI1NOTBM-NEXT: pushl %esi 3882; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx 3883; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 3884; X86-BMI1NOTBM-NEXT: movb $64, %cl 3885; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 3886; X86-BMI1NOTBM-NEXT: movl %edx, %esi 3887; X86-BMI1NOTBM-NEXT: shll %cl, %esi 3888; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax 3889; X86-BMI1NOTBM-NEXT: testb $32, %cl 3890; X86-BMI1NOTBM-NEXT: movl %esi, %edi 3891; X86-BMI1NOTBM-NEXT: jne .LBB47_2 3892; X86-BMI1NOTBM-NEXT: # %bb.1: 3893; X86-BMI1NOTBM-NEXT: movl %eax, %edi 3894; X86-BMI1NOTBM-NEXT: .LBB47_2: 3895; X86-BMI1NOTBM-NEXT: movl %edi, %eax 3896; X86-BMI1NOTBM-NEXT: shrl %cl, %eax 3897; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx 3898; X86-BMI1NOTBM-NEXT: testb $32, %cl 3899; X86-BMI1NOTBM-NEXT: movl $0, %edx 3900; X86-BMI1NOTBM-NEXT: jne .LBB47_4 3901; X86-BMI1NOTBM-NEXT: # %bb.3: 3902; X86-BMI1NOTBM-NEXT: movl %esi, %ebx 3903; X86-BMI1NOTBM-NEXT: movl %eax, %edx 3904; X86-BMI1NOTBM-NEXT: .LBB47_4: 3905; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx 3906; X86-BMI1NOTBM-NEXT: testb $32, %cl 3907; X86-BMI1NOTBM-NEXT: jne .LBB47_6 3908; X86-BMI1NOTBM-NEXT: # %bb.5: 3909; X86-BMI1NOTBM-NEXT: movl %ebx, %eax 3910; X86-BMI1NOTBM-NEXT: .LBB47_6: 3911; X86-BMI1NOTBM-NEXT: popl %esi 3912; X86-BMI1NOTBM-NEXT: popl %edi 3913; X86-BMI1NOTBM-NEXT: popl %ebx 3914; X86-BMI1NOTBM-NEXT: retl 3915; 3916; X86-BMI1BMI2-LABEL: bzhi64_d0: 3917; X86-BMI1BMI2: # %bb.0: 3918; X86-BMI1BMI2-NEXT: pushl %edi 3919; X86-BMI1BMI2-NEXT: pushl %esi 3920; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3921; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 3922; X86-BMI1BMI2-NEXT: movb $64, %cl 3923; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3924; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi 3925; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi 3926; X86-BMI1BMI2-NEXT: xorl %edx, %edx 3927; X86-BMI1BMI2-NEXT: testb $32, %cl 3928; X86-BMI1BMI2-NEXT: je .LBB47_2 3929; X86-BMI1BMI2-NEXT: # %bb.1: 3930; X86-BMI1BMI2-NEXT: movl %edi, %esi 3931; X86-BMI1BMI2-NEXT: movl $0, %edi 3932; X86-BMI1BMI2-NEXT: .LBB47_2: 3933; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 3934; X86-BMI1BMI2-NEXT: jne .LBB47_4 3935; X86-BMI1BMI2-NEXT: # %bb.3: 3936; X86-BMI1BMI2-NEXT: movl %eax, %edx 3937; X86-BMI1BMI2-NEXT: .LBB47_4: 3938; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 3939; X86-BMI1BMI2-NEXT: testb $32, %cl 3940; X86-BMI1BMI2-NEXT: jne .LBB47_6 3941; X86-BMI1BMI2-NEXT: # %bb.5: 3942; X86-BMI1BMI2-NEXT: movl %edi, %eax 3943; X86-BMI1BMI2-NEXT: .LBB47_6: 3944; X86-BMI1BMI2-NEXT: popl %esi 3945; X86-BMI1BMI2-NEXT: popl %edi 3946; X86-BMI1BMI2-NEXT: retl 3947; 3948; X64-NOBMI-LABEL: bzhi64_d0: 3949; X64-NOBMI: # %bb.0: 3950; X64-NOBMI-NEXT: movq %rsi, %rcx 3951; X64-NOBMI-NEXT: movq %rdi, %rax 3952; X64-NOBMI-NEXT: negb %cl 3953; X64-NOBMI-NEXT: shlq %cl, %rax 3954; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3955; X64-NOBMI-NEXT: shrq %cl, %rax 3956; X64-NOBMI-NEXT: retq 3957; 3958; X64-BMI1NOTBM-LABEL: bzhi64_d0: 3959; X64-BMI1NOTBM: # %bb.0: 3960; X64-BMI1NOTBM-NEXT: shll $8, %esi 3961; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 3962; X64-BMI1NOTBM-NEXT: retq 3963; 3964; X64-BMI1BMI2-LABEL: bzhi64_d0: 3965; X64-BMI1BMI2: # %bb.0: 3966; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 3967; X64-BMI1BMI2-NEXT: retq 3968 %numhighbits = sub i64 64, %numlowbits 3969 %highbitscleared = shl i64 %val, %numhighbits 3970 %masked = lshr i64 %highbitscleared, %numhighbits 3971 ret i64 %masked 3972} 3973 3974define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { 3975; X86-NOBMI-LABEL: bzhi64_d1_indexzext: 3976; X86-NOBMI: # %bb.0: 3977; X86-NOBMI-NEXT: pushl %ebx 3978; X86-NOBMI-NEXT: pushl %edi 3979; X86-NOBMI-NEXT: pushl %esi 3980; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3981; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3982; X86-NOBMI-NEXT: movb $64, %cl 3983; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3984; X86-NOBMI-NEXT: movl %edx, %esi 3985; X86-NOBMI-NEXT: shll %cl, %esi 3986; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3987; X86-NOBMI-NEXT: testb $32, %cl 3988; X86-NOBMI-NEXT: movl %esi, %edi 3989; X86-NOBMI-NEXT: jne .LBB48_2 3990; X86-NOBMI-NEXT: # %bb.1: 3991; X86-NOBMI-NEXT: movl %eax, %edi 3992; X86-NOBMI-NEXT: .LBB48_2: 3993; X86-NOBMI-NEXT: movl %edi, %eax 3994; X86-NOBMI-NEXT: shrl %cl, %eax 3995; X86-NOBMI-NEXT: xorl %ebx, %ebx 3996; X86-NOBMI-NEXT: testb $32, %cl 3997; X86-NOBMI-NEXT: movl $0, %edx 3998; X86-NOBMI-NEXT: jne .LBB48_4 3999; X86-NOBMI-NEXT: # %bb.3: 4000; X86-NOBMI-NEXT: movl %esi, %ebx 4001; X86-NOBMI-NEXT: movl %eax, %edx 4002; X86-NOBMI-NEXT: .LBB48_4: 4003; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 4004; X86-NOBMI-NEXT: testb $32, %cl 4005; X86-NOBMI-NEXT: jne .LBB48_6 4006; X86-NOBMI-NEXT: # %bb.5: 4007; X86-NOBMI-NEXT: movl %ebx, %eax 4008; X86-NOBMI-NEXT: .LBB48_6: 4009; X86-NOBMI-NEXT: popl %esi 4010; X86-NOBMI-NEXT: popl %edi 4011; X86-NOBMI-NEXT: popl %ebx 4012; X86-NOBMI-NEXT: retl 4013; 4014; X86-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: 4015; X86-BMI1NOTBM: # %bb.0: 4016; X86-BMI1NOTBM-NEXT: pushl %ebx 4017; X86-BMI1NOTBM-NEXT: pushl %edi 4018; X86-BMI1NOTBM-NEXT: pushl %esi 4019; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx 4020; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 4021; X86-BMI1NOTBM-NEXT: movb $64, %cl 4022; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 4023; X86-BMI1NOTBM-NEXT: movl %edx, %esi 4024; X86-BMI1NOTBM-NEXT: shll %cl, %esi 4025; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax 4026; X86-BMI1NOTBM-NEXT: testb $32, %cl 4027; X86-BMI1NOTBM-NEXT: movl %esi, %edi 4028; X86-BMI1NOTBM-NEXT: jne .LBB48_2 4029; X86-BMI1NOTBM-NEXT: # %bb.1: 4030; X86-BMI1NOTBM-NEXT: movl %eax, %edi 4031; X86-BMI1NOTBM-NEXT: .LBB48_2: 4032; X86-BMI1NOTBM-NEXT: movl %edi, %eax 4033; X86-BMI1NOTBM-NEXT: shrl %cl, %eax 4034; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx 4035; X86-BMI1NOTBM-NEXT: testb $32, %cl 4036; X86-BMI1NOTBM-NEXT: movl $0, %edx 4037; X86-BMI1NOTBM-NEXT: jne .LBB48_4 4038; X86-BMI1NOTBM-NEXT: # %bb.3: 4039; X86-BMI1NOTBM-NEXT: movl %esi, %ebx 4040; X86-BMI1NOTBM-NEXT: movl %eax, %edx 4041; X86-BMI1NOTBM-NEXT: .LBB48_4: 4042; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx 4043; X86-BMI1NOTBM-NEXT: testb $32, %cl 4044; X86-BMI1NOTBM-NEXT: jne .LBB48_6 4045; X86-BMI1NOTBM-NEXT: # %bb.5: 4046; X86-BMI1NOTBM-NEXT: movl %ebx, %eax 4047; X86-BMI1NOTBM-NEXT: .LBB48_6: 4048; X86-BMI1NOTBM-NEXT: popl %esi 4049; X86-BMI1NOTBM-NEXT: popl %edi 4050; X86-BMI1NOTBM-NEXT: popl %ebx 4051; X86-BMI1NOTBM-NEXT: retl 4052; 4053; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext: 4054; X86-BMI1BMI2: # %bb.0: 4055; X86-BMI1BMI2-NEXT: pushl %edi 4056; X86-BMI1BMI2-NEXT: pushl %esi 4057; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 4058; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 4059; X86-BMI1BMI2-NEXT: movb $64, %cl 4060; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 4061; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi 4062; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi 4063; X86-BMI1BMI2-NEXT: xorl %edx, %edx 4064; X86-BMI1BMI2-NEXT: testb $32, %cl 4065; X86-BMI1BMI2-NEXT: je .LBB48_2 4066; X86-BMI1BMI2-NEXT: # %bb.1: 4067; X86-BMI1BMI2-NEXT: movl %edi, %esi 4068; X86-BMI1BMI2-NEXT: movl $0, %edi 4069; X86-BMI1BMI2-NEXT: .LBB48_2: 4070; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 4071; X86-BMI1BMI2-NEXT: jne .LBB48_4 4072; X86-BMI1BMI2-NEXT: # %bb.3: 4073; X86-BMI1BMI2-NEXT: movl %eax, %edx 4074; X86-BMI1BMI2-NEXT: .LBB48_4: 4075; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 4076; X86-BMI1BMI2-NEXT: testb $32, %cl 4077; X86-BMI1BMI2-NEXT: jne .LBB48_6 4078; X86-BMI1BMI2-NEXT: # %bb.5: 4079; X86-BMI1BMI2-NEXT: movl %edi, %eax 4080; X86-BMI1BMI2-NEXT: .LBB48_6: 4081; X86-BMI1BMI2-NEXT: popl %esi 4082; X86-BMI1BMI2-NEXT: popl %edi 4083; X86-BMI1BMI2-NEXT: retl 4084; 4085; X64-NOBMI-LABEL: bzhi64_d1_indexzext: 4086; X64-NOBMI: # %bb.0: 4087; X64-NOBMI-NEXT: movl %esi, %ecx 4088; X64-NOBMI-NEXT: movq %rdi, %rax 4089; X64-NOBMI-NEXT: negb %cl 4090; X64-NOBMI-NEXT: shlq %cl, %rax 4091; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4092; X64-NOBMI-NEXT: shrq %cl, %rax 4093; X64-NOBMI-NEXT: retq 4094; 4095; X64-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: 4096; X64-BMI1NOTBM: # %bb.0: 4097; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi 4098; X64-BMI1NOTBM-NEXT: shll $8, %esi 4099; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 4100; X64-BMI1NOTBM-NEXT: retq 4101; 4102; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext: 4103; X64-BMI1BMI2: # %bb.0: 4104; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 4105; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 4106; X64-BMI1BMI2-NEXT: retq 4107 %numhighbits = sub i8 64, %numlowbits 4108 %sh_prom = zext i8 %numhighbits to i64 4109 %highbitscleared = shl i64 %val, %sh_prom 4110 %masked = lshr i64 %highbitscleared, %sh_prom 4111 ret i64 %masked 4112} 4113 4114define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { 4115; X86-NOBMI-LABEL: bzhi64_d2_load: 4116; X86-NOBMI: # %bb.0: 4117; X86-NOBMI-NEXT: pushl %ebx 4118; X86-NOBMI-NEXT: pushl %edi 4119; X86-NOBMI-NEXT: pushl %esi 4120; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4121; X86-NOBMI-NEXT: movl (%eax), %edx 4122; X86-NOBMI-NEXT: movl 4(%eax), %eax 4123; X86-NOBMI-NEXT: movb $64, %cl 4124; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4125; X86-NOBMI-NEXT: movl %edx, %esi 4126; X86-NOBMI-NEXT: shll %cl, %esi 4127; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 4128; X86-NOBMI-NEXT: testb $32, %cl 4129; X86-NOBMI-NEXT: movl %esi, %edi 4130; X86-NOBMI-NEXT: jne .LBB49_2 4131; X86-NOBMI-NEXT: # %bb.1: 4132; X86-NOBMI-NEXT: movl %eax, %edi 4133; X86-NOBMI-NEXT: .LBB49_2: 4134; X86-NOBMI-NEXT: movl %edi, %eax 4135; X86-NOBMI-NEXT: shrl %cl, %eax 4136; X86-NOBMI-NEXT: xorl %ebx, %ebx 4137; X86-NOBMI-NEXT: testb $32, %cl 4138; X86-NOBMI-NEXT: movl $0, %edx 4139; X86-NOBMI-NEXT: jne .LBB49_4 4140; X86-NOBMI-NEXT: # %bb.3: 4141; X86-NOBMI-NEXT: movl %esi, %ebx 4142; X86-NOBMI-NEXT: movl %eax, %edx 4143; X86-NOBMI-NEXT: .LBB49_4: 4144; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 4145; X86-NOBMI-NEXT: testb $32, %cl 4146; X86-NOBMI-NEXT: jne .LBB49_6 4147; X86-NOBMI-NEXT: # %bb.5: 4148; X86-NOBMI-NEXT: movl %ebx, %eax 4149; X86-NOBMI-NEXT: .LBB49_6: 4150; X86-NOBMI-NEXT: popl %esi 4151; X86-NOBMI-NEXT: popl %edi 4152; X86-NOBMI-NEXT: popl %ebx 4153; X86-NOBMI-NEXT: retl 4154; 4155; X86-BMI1NOTBM-LABEL: bzhi64_d2_load: 4156; X86-BMI1NOTBM: # %bb.0: 4157; X86-BMI1NOTBM-NEXT: pushl %ebx 4158; X86-BMI1NOTBM-NEXT: pushl %edi 4159; X86-BMI1NOTBM-NEXT: pushl %esi 4160; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 4161; X86-BMI1NOTBM-NEXT: movl (%eax), %edx 4162; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax 4163; X86-BMI1NOTBM-NEXT: movb $64, %cl 4164; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 4165; X86-BMI1NOTBM-NEXT: movl %edx, %esi 4166; X86-BMI1NOTBM-NEXT: shll %cl, %esi 4167; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax 4168; X86-BMI1NOTBM-NEXT: testb $32, %cl 4169; X86-BMI1NOTBM-NEXT: movl %esi, %edi 4170; X86-BMI1NOTBM-NEXT: jne .LBB49_2 4171; X86-BMI1NOTBM-NEXT: # %bb.1: 4172; X86-BMI1NOTBM-NEXT: movl %eax, %edi 4173; X86-BMI1NOTBM-NEXT: .LBB49_2: 4174; X86-BMI1NOTBM-NEXT: movl %edi, %eax 4175; X86-BMI1NOTBM-NEXT: shrl %cl, %eax 4176; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx 4177; X86-BMI1NOTBM-NEXT: testb $32, %cl 4178; X86-BMI1NOTBM-NEXT: movl $0, %edx 4179; X86-BMI1NOTBM-NEXT: jne .LBB49_4 4180; X86-BMI1NOTBM-NEXT: # %bb.3: 4181; X86-BMI1NOTBM-NEXT: movl %esi, %ebx 4182; X86-BMI1NOTBM-NEXT: movl %eax, %edx 4183; X86-BMI1NOTBM-NEXT: .LBB49_4: 4184; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx 4185; X86-BMI1NOTBM-NEXT: testb $32, %cl 4186; X86-BMI1NOTBM-NEXT: jne .LBB49_6 4187; X86-BMI1NOTBM-NEXT: # %bb.5: 4188; X86-BMI1NOTBM-NEXT: movl %ebx, %eax 4189; X86-BMI1NOTBM-NEXT: .LBB49_6: 4190; X86-BMI1NOTBM-NEXT: popl %esi 4191; X86-BMI1NOTBM-NEXT: popl %edi 4192; X86-BMI1NOTBM-NEXT: popl %ebx 4193; X86-BMI1NOTBM-NEXT: retl 4194; 4195; X86-BMI1BMI2-LABEL: bzhi64_d2_load: 4196; X86-BMI1BMI2: # %bb.0: 4197; X86-BMI1BMI2-NEXT: pushl %edi 4198; X86-BMI1BMI2-NEXT: pushl %esi 4199; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 4200; X86-BMI1BMI2-NEXT: movl (%eax), %edx 4201; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi 4202; X86-BMI1BMI2-NEXT: movb $64, %cl 4203; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 4204; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi 4205; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi 4206; X86-BMI1BMI2-NEXT: xorl %edx, %edx 4207; X86-BMI1BMI2-NEXT: testb $32, %cl 4208; X86-BMI1BMI2-NEXT: je .LBB49_2 4209; X86-BMI1BMI2-NEXT: # %bb.1: 4210; X86-BMI1BMI2-NEXT: movl %edi, %esi 4211; X86-BMI1BMI2-NEXT: movl $0, %edi 4212; X86-BMI1BMI2-NEXT: .LBB49_2: 4213; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 4214; X86-BMI1BMI2-NEXT: jne .LBB49_4 4215; X86-BMI1BMI2-NEXT: # %bb.3: 4216; X86-BMI1BMI2-NEXT: movl %eax, %edx 4217; X86-BMI1BMI2-NEXT: .LBB49_4: 4218; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 4219; X86-BMI1BMI2-NEXT: testb $32, %cl 4220; X86-BMI1BMI2-NEXT: jne .LBB49_6 4221; X86-BMI1BMI2-NEXT: # %bb.5: 4222; X86-BMI1BMI2-NEXT: movl %edi, %eax 4223; X86-BMI1BMI2-NEXT: .LBB49_6: 4224; X86-BMI1BMI2-NEXT: popl %esi 4225; X86-BMI1BMI2-NEXT: popl %edi 4226; X86-BMI1BMI2-NEXT: retl 4227; 4228; X64-NOBMI-LABEL: bzhi64_d2_load: 4229; X64-NOBMI: # %bb.0: 4230; X64-NOBMI-NEXT: movq %rsi, %rcx 4231; X64-NOBMI-NEXT: movq (%rdi), %rax 4232; X64-NOBMI-NEXT: negb %cl 4233; X64-NOBMI-NEXT: shlq %cl, %rax 4234; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 4235; X64-NOBMI-NEXT: shrq %cl, %rax 4236; X64-NOBMI-NEXT: retq 4237; 4238; X64-BMI1NOTBM-LABEL: bzhi64_d2_load: 4239; X64-BMI1NOTBM: # %bb.0: 4240; X64-BMI1NOTBM-NEXT: shll $8, %esi 4241; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax 4242; X64-BMI1NOTBM-NEXT: retq 4243; 4244; X64-BMI1BMI2-LABEL: bzhi64_d2_load: 4245; X64-BMI1BMI2: # %bb.0: 4246; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 4247; X64-BMI1BMI2-NEXT: retq 4248 %val = load i64, i64* %w 4249 %numhighbits = sub i64 64, %numlowbits 4250 %highbitscleared = shl i64 %val, %numhighbits 4251 %masked = lshr i64 %highbitscleared, %numhighbits 4252 ret i64 %masked 4253} 4254 4255define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 4256; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext: 4257; X86-NOBMI: # %bb.0: 4258; X86-NOBMI-NEXT: pushl %ebx 4259; X86-NOBMI-NEXT: pushl %edi 4260; X86-NOBMI-NEXT: pushl %esi 4261; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4262; X86-NOBMI-NEXT: movl (%eax), %edx 4263; X86-NOBMI-NEXT: movl 4(%eax), %eax 4264; X86-NOBMI-NEXT: movb $64, %cl 4265; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4266; X86-NOBMI-NEXT: movl %edx, %esi 4267; X86-NOBMI-NEXT: shll %cl, %esi 4268; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 4269; X86-NOBMI-NEXT: testb $32, %cl 4270; X86-NOBMI-NEXT: movl %esi, %edi 4271; X86-NOBMI-NEXT: jne .LBB50_2 4272; X86-NOBMI-NEXT: # %bb.1: 4273; X86-NOBMI-NEXT: movl %eax, %edi 4274; X86-NOBMI-NEXT: .LBB50_2: 4275; X86-NOBMI-NEXT: movl %edi, %eax 4276; X86-NOBMI-NEXT: shrl %cl, %eax 4277; X86-NOBMI-NEXT: xorl %ebx, %ebx 4278; X86-NOBMI-NEXT: testb $32, %cl 4279; X86-NOBMI-NEXT: movl $0, %edx 4280; X86-NOBMI-NEXT: jne .LBB50_4 4281; X86-NOBMI-NEXT: # %bb.3: 4282; X86-NOBMI-NEXT: movl %esi, %ebx 4283; X86-NOBMI-NEXT: movl %eax, %edx 4284; X86-NOBMI-NEXT: .LBB50_4: 4285; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 4286; X86-NOBMI-NEXT: testb $32, %cl 4287; X86-NOBMI-NEXT: jne .LBB50_6 4288; X86-NOBMI-NEXT: # %bb.5: 4289; X86-NOBMI-NEXT: movl %ebx, %eax 4290; X86-NOBMI-NEXT: .LBB50_6: 4291; X86-NOBMI-NEXT: popl %esi 4292; X86-NOBMI-NEXT: popl %edi 4293; X86-NOBMI-NEXT: popl %ebx 4294; X86-NOBMI-NEXT: retl 4295; 4296; X86-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: 4297; X86-BMI1NOTBM: # %bb.0: 4298; X86-BMI1NOTBM-NEXT: pushl %ebx 4299; X86-BMI1NOTBM-NEXT: pushl %edi 4300; X86-BMI1NOTBM-NEXT: pushl %esi 4301; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 4302; X86-BMI1NOTBM-NEXT: movl (%eax), %edx 4303; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax 4304; X86-BMI1NOTBM-NEXT: movb $64, %cl 4305; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 4306; X86-BMI1NOTBM-NEXT: movl %edx, %esi 4307; X86-BMI1NOTBM-NEXT: shll %cl, %esi 4308; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax 4309; X86-BMI1NOTBM-NEXT: testb $32, %cl 4310; X86-BMI1NOTBM-NEXT: movl %esi, %edi 4311; X86-BMI1NOTBM-NEXT: jne .LBB50_2 4312; X86-BMI1NOTBM-NEXT: # %bb.1: 4313; X86-BMI1NOTBM-NEXT: movl %eax, %edi 4314; X86-BMI1NOTBM-NEXT: .LBB50_2: 4315; X86-BMI1NOTBM-NEXT: movl %edi, %eax 4316; X86-BMI1NOTBM-NEXT: shrl %cl, %eax 4317; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx 4318; X86-BMI1NOTBM-NEXT: testb $32, %cl 4319; X86-BMI1NOTBM-NEXT: movl $0, %edx 4320; X86-BMI1NOTBM-NEXT: jne .LBB50_4 4321; X86-BMI1NOTBM-NEXT: # %bb.3: 4322; X86-BMI1NOTBM-NEXT: movl %esi, %ebx 4323; X86-BMI1NOTBM-NEXT: movl %eax, %edx 4324; X86-BMI1NOTBM-NEXT: .LBB50_4: 4325; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx 4326; X86-BMI1NOTBM-NEXT: testb $32, %cl 4327; X86-BMI1NOTBM-NEXT: jne .LBB50_6 4328; X86-BMI1NOTBM-NEXT: # %bb.5: 4329; X86-BMI1NOTBM-NEXT: movl %ebx, %eax 4330; X86-BMI1NOTBM-NEXT: .LBB50_6: 4331; X86-BMI1NOTBM-NEXT: popl %esi 4332; X86-BMI1NOTBM-NEXT: popl %edi 4333; X86-BMI1NOTBM-NEXT: popl %ebx 4334; X86-BMI1NOTBM-NEXT: retl 4335; 4336; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: 4337; X86-BMI1BMI2: # %bb.0: 4338; X86-BMI1BMI2-NEXT: pushl %edi 4339; X86-BMI1BMI2-NEXT: pushl %esi 4340; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 4341; X86-BMI1BMI2-NEXT: movl (%eax), %edx 4342; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi 4343; X86-BMI1BMI2-NEXT: movb $64, %cl 4344; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 4345; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi 4346; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi 4347; X86-BMI1BMI2-NEXT: xorl %edx, %edx 4348; X86-BMI1BMI2-NEXT: testb $32, %cl 4349; X86-BMI1BMI2-NEXT: je .LBB50_2 4350; X86-BMI1BMI2-NEXT: # %bb.1: 4351; X86-BMI1BMI2-NEXT: movl %edi, %esi 4352; X86-BMI1BMI2-NEXT: movl $0, %edi 4353; X86-BMI1BMI2-NEXT: .LBB50_2: 4354; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 4355; X86-BMI1BMI2-NEXT: jne .LBB50_4 4356; X86-BMI1BMI2-NEXT: # %bb.3: 4357; X86-BMI1BMI2-NEXT: movl %eax, %edx 4358; X86-BMI1BMI2-NEXT: .LBB50_4: 4359; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 4360; X86-BMI1BMI2-NEXT: testb $32, %cl 4361; X86-BMI1BMI2-NEXT: jne .LBB50_6 4362; X86-BMI1BMI2-NEXT: # %bb.5: 4363; X86-BMI1BMI2-NEXT: movl %edi, %eax 4364; X86-BMI1BMI2-NEXT: .LBB50_6: 4365; X86-BMI1BMI2-NEXT: popl %esi 4366; X86-BMI1BMI2-NEXT: popl %edi 4367; X86-BMI1BMI2-NEXT: retl 4368; 4369; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: 4370; X64-NOBMI: # %bb.0: 4371; X64-NOBMI-NEXT: movl %esi, %ecx 4372; X64-NOBMI-NEXT: movq (%rdi), %rax 4373; X64-NOBMI-NEXT: negb %cl 4374; X64-NOBMI-NEXT: shlq %cl, %rax 4375; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4376; X64-NOBMI-NEXT: shrq %cl, %rax 4377; X64-NOBMI-NEXT: retq 4378; 4379; X64-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: 4380; X64-BMI1NOTBM: # %bb.0: 4381; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi 4382; X64-BMI1NOTBM-NEXT: shll $8, %esi 4383; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax 4384; X64-BMI1NOTBM-NEXT: retq 4385; 4386; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: 4387; X64-BMI1BMI2: # %bb.0: 4388; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 4389; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 4390; X64-BMI1BMI2-NEXT: retq 4391 %val = load i64, i64* %w 4392 %numhighbits = sub i8 64, %numlowbits 4393 %sh_prom = zext i8 %numhighbits to i64 4394 %highbitscleared = shl i64 %val, %sh_prom 4395 %masked = lshr i64 %highbitscleared, %sh_prom 4396 ret i64 %masked 4397} 4398 4399; 64-bit, but with 32-bit output 4400 4401; Everything done in 64-bit, truncation happens last. 4402define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { 4403; X86-NOBMI-LABEL: bzhi64_32_d0: 4404; X86-NOBMI: # %bb.0: 4405; X86-NOBMI-NEXT: pushl %esi 4406; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 4407; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4408; X86-NOBMI-NEXT: movb $64, %cl 4409; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4410; X86-NOBMI-NEXT: movl %esi, %edx 4411; X86-NOBMI-NEXT: shll %cl, %edx 4412; X86-NOBMI-NEXT: shldl %cl, %esi, %eax 4413; X86-NOBMI-NEXT: testb $32, %cl 4414; X86-NOBMI-NEXT: je .LBB51_2 4415; X86-NOBMI-NEXT: # %bb.1: 4416; X86-NOBMI-NEXT: movl %edx, %eax 4417; X86-NOBMI-NEXT: xorl %edx, %edx 4418; X86-NOBMI-NEXT: .LBB51_2: 4419; X86-NOBMI-NEXT: shrdl %cl, %eax, %edx 4420; X86-NOBMI-NEXT: shrl %cl, %eax 4421; X86-NOBMI-NEXT: testb $32, %cl 4422; X86-NOBMI-NEXT: jne .LBB51_4 4423; X86-NOBMI-NEXT: # %bb.3: 4424; X86-NOBMI-NEXT: movl %edx, %eax 4425; X86-NOBMI-NEXT: .LBB51_4: 4426; X86-NOBMI-NEXT: popl %esi 4427; X86-NOBMI-NEXT: retl 4428; 4429; X86-BMI1NOTBM-LABEL: bzhi64_32_d0: 4430; X86-BMI1NOTBM: # %bb.0: 4431; X86-BMI1NOTBM-NEXT: pushl %esi 4432; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi 4433; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax 4434; X86-BMI1NOTBM-NEXT: movb $64, %cl 4435; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl 4436; X86-BMI1NOTBM-NEXT: movl %esi, %edx 4437; X86-BMI1NOTBM-NEXT: shll %cl, %edx 4438; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %eax 4439; X86-BMI1NOTBM-NEXT: testb $32, %cl 4440; X86-BMI1NOTBM-NEXT: je .LBB51_2 4441; X86-BMI1NOTBM-NEXT: # %bb.1: 4442; X86-BMI1NOTBM-NEXT: movl %edx, %eax 4443; X86-BMI1NOTBM-NEXT: xorl %edx, %edx 4444; X86-BMI1NOTBM-NEXT: .LBB51_2: 4445; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edx 4446; X86-BMI1NOTBM-NEXT: shrl %cl, %eax 4447; X86-BMI1NOTBM-NEXT: testb $32, %cl 4448; X86-BMI1NOTBM-NEXT: jne .LBB51_4 4449; X86-BMI1NOTBM-NEXT: # %bb.3: 4450; X86-BMI1NOTBM-NEXT: movl %edx, %eax 4451; X86-BMI1NOTBM-NEXT: .LBB51_4: 4452; X86-BMI1NOTBM-NEXT: popl %esi 4453; X86-BMI1NOTBM-NEXT: retl 4454; 4455; X86-BMI1BMI2-LABEL: bzhi64_32_d0: 4456; X86-BMI1BMI2: # %bb.0: 4457; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 4458; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 4459; X86-BMI1BMI2-NEXT: movb $64, %cl 4460; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 4461; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 4462; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 4463; X86-BMI1BMI2-NEXT: testb $32, %cl 4464; X86-BMI1BMI2-NEXT: je .LBB51_2 4465; X86-BMI1BMI2-NEXT: # %bb.1: 4466; X86-BMI1BMI2-NEXT: movl %eax, %edx 4467; X86-BMI1BMI2-NEXT: xorl %eax, %eax 4468; X86-BMI1BMI2-NEXT: .LBB51_2: 4469; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax 4470; X86-BMI1BMI2-NEXT: testb $32, %cl 4471; X86-BMI1BMI2-NEXT: je .LBB51_4 4472; X86-BMI1BMI2-NEXT: # %bb.3: 4473; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax 4474; X86-BMI1BMI2-NEXT: .LBB51_4: 4475; X86-BMI1BMI2-NEXT: retl 4476; 4477; X64-NOBMI-LABEL: bzhi64_32_d0: 4478; X64-NOBMI: # %bb.0: 4479; X64-NOBMI-NEXT: movq %rsi, %rcx 4480; X64-NOBMI-NEXT: movq %rdi, %rax 4481; X64-NOBMI-NEXT: negb %cl 4482; X64-NOBMI-NEXT: shlq %cl, %rax 4483; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 4484; X64-NOBMI-NEXT: shrq %cl, %rax 4485; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 4486; X64-NOBMI-NEXT: retq 4487; 4488; X64-BMI1NOTBM-LABEL: bzhi64_32_d0: 4489; X64-BMI1NOTBM: # %bb.0: 4490; X64-BMI1NOTBM-NEXT: shll $8, %esi 4491; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax 4492; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax 4493; X64-BMI1NOTBM-NEXT: retq 4494; 4495; X64-BMI1BMI2-LABEL: bzhi64_32_d0: 4496; X64-BMI1BMI2: # %bb.0: 4497; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 4498; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax 4499; X64-BMI1BMI2-NEXT: retq 4500 %numhighbits = sub i64 64, %numlowbits 4501 %highbitscleared = shl i64 %val, %numhighbits 4502 %masked = lshr i64 %highbitscleared, %numhighbits 4503 %res = trunc i64 %masked to i32 4504 ret i32 %res 4505} 4506 4507; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 4508define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { 4509; X86-NOBMI-LABEL: bzhi64_32_d1: 4510; X86-NOBMI: # %bb.0: 4511; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4512; X86-NOBMI-NEXT: xorl %ecx, %ecx 4513; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4514; X86-NOBMI-NEXT: shll %cl, %eax 4515; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4516; X86-NOBMI-NEXT: shrl %cl, %eax 4517; X86-NOBMI-NEXT: retl 4518; 4519; X86-BMI1NOTBM-LABEL: bzhi64_32_d1: 4520; X86-BMI1NOTBM: # %bb.0: 4521; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al 4522; X86-BMI1NOTBM-NEXT: shll $8, %eax 4523; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 4524; X86-BMI1NOTBM-NEXT: retl 4525; 4526; X86-BMI1BMI2-LABEL: bzhi64_32_d1: 4527; X86-BMI1BMI2: # %bb.0: 4528; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 4529; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 4530; X86-BMI1BMI2-NEXT: retl 4531; 4532; X64-NOBMI-LABEL: bzhi64_32_d1: 4533; X64-NOBMI: # %bb.0: 4534; X64-NOBMI-NEXT: movl %esi, %ecx 4535; X64-NOBMI-NEXT: movq %rdi, %rax 4536; X64-NOBMI-NEXT: negb %cl 4537; X64-NOBMI-NEXT: shll %cl, %eax 4538; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4539; X64-NOBMI-NEXT: shrl %cl, %eax 4540; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 4541; X64-NOBMI-NEXT: retq 4542; 4543; X64-BMI1NOTBM-LABEL: bzhi64_32_d1: 4544; X64-BMI1NOTBM: # %bb.0: 4545; X64-BMI1NOTBM-NEXT: shll $8, %esi 4546; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax 4547; X64-BMI1NOTBM-NEXT: retq 4548; 4549; X64-BMI1BMI2-LABEL: bzhi64_32_d1: 4550; X64-BMI1BMI2: # %bb.0: 4551; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 4552; X64-BMI1BMI2-NEXT: retq 4553 %truncval = trunc i64 %val to i32 4554 %numhighbits = sub i32 32, %numlowbits 4555 %highbitscleared = shl i32 %truncval, %numhighbits 4556 %masked = lshr i32 %highbitscleared, %numhighbits 4557 ret i32 %masked 4558} 4559 4560; ---------------------------------------------------------------------------- ; 4561; Constant mask 4562; ---------------------------------------------------------------------------- ; 4563 4564; 32-bit 4565 4566define i32 @bzhi32_constant_mask32(i32 %val) nounwind { 4567; X86-LABEL: bzhi32_constant_mask32: 4568; X86: # %bb.0: 4569; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4570; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4571; X86-NEXT: retl 4572; 4573; X64-LABEL: bzhi32_constant_mask32: 4574; X64: # %bb.0: 4575; X64-NEXT: movl %edi, %eax 4576; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4577; X64-NEXT: retq 4578 %masked = and i32 %val, 2147483647 4579 ret i32 %masked 4580} 4581 4582define i32 @bzhi32_constant_mask32_load(i32* %val) nounwind { 4583; X86-LABEL: bzhi32_constant_mask32_load: 4584; X86: # %bb.0: 4585; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4586; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4587; X86-NEXT: andl (%ecx), %eax 4588; X86-NEXT: retl 4589; 4590; X64-LABEL: bzhi32_constant_mask32_load: 4591; X64: # %bb.0: 4592; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4593; X64-NEXT: andl (%rdi), %eax 4594; X64-NEXT: retq 4595 %val1 = load i32, i32* %val 4596 %masked = and i32 %val1, 2147483647 4597 ret i32 %masked 4598} 4599 4600define i32 @bzhi32_constant_mask16(i32 %val) nounwind { 4601; X86-LABEL: bzhi32_constant_mask16: 4602; X86: # %bb.0: 4603; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4604; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4605; X86-NEXT: retl 4606; 4607; X64-LABEL: bzhi32_constant_mask16: 4608; X64: # %bb.0: 4609; X64-NEXT: movl %edi, %eax 4610; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4611; X64-NEXT: retq 4612 %masked = and i32 %val, 32767 4613 ret i32 %masked 4614} 4615 4616define i32 @bzhi32_constant_mask16_load(i32* %val) nounwind { 4617; X86-LABEL: bzhi32_constant_mask16_load: 4618; X86: # %bb.0: 4619; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4620; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4621; X86-NEXT: andl (%ecx), %eax 4622; X86-NEXT: retl 4623; 4624; X64-LABEL: bzhi32_constant_mask16_load: 4625; X64: # %bb.0: 4626; X64-NEXT: movl $32767, %eax # imm = 0x7FFF 4627; X64-NEXT: andl (%rdi), %eax 4628; X64-NEXT: retq 4629 %val1 = load i32, i32* %val 4630 %masked = and i32 %val1, 32767 4631 ret i32 %masked 4632} 4633 4634define i32 @bzhi32_constant_mask8(i32 %val) nounwind { 4635; X86-LABEL: bzhi32_constant_mask8: 4636; X86: # %bb.0: 4637; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4638; X86-NEXT: andl $127, %eax 4639; X86-NEXT: retl 4640; 4641; X64-LABEL: bzhi32_constant_mask8: 4642; X64: # %bb.0: 4643; X64-NEXT: movl %edi, %eax 4644; X64-NEXT: andl $127, %eax 4645; X64-NEXT: retq 4646 %masked = and i32 %val, 127 4647 ret i32 %masked 4648} 4649 4650define i32 @bzhi32_constant_mask8_load(i32* %val) nounwind { 4651; X86-LABEL: bzhi32_constant_mask8_load: 4652; X86: # %bb.0: 4653; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4654; X86-NEXT: movl (%eax), %eax 4655; X86-NEXT: andl $127, %eax 4656; X86-NEXT: retl 4657; 4658; X64-LABEL: bzhi32_constant_mask8_load: 4659; X64: # %bb.0: 4660; X64-NEXT: movl (%rdi), %eax 4661; X64-NEXT: andl $127, %eax 4662; X64-NEXT: retq 4663 %val1 = load i32, i32* %val 4664 %masked = and i32 %val1, 127 4665 ret i32 %masked 4666} 4667 4668; 64-bit 4669 4670define i64 @bzhi64_constant_mask64(i64 %val) nounwind { 4671; X86-LABEL: bzhi64_constant_mask64: 4672; X86: # %bb.0: 4673; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4674; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 4675; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 4676; X86-NEXT: retl 4677; 4678; X64-NOBMI-LABEL: bzhi64_constant_mask64: 4679; X64-NOBMI: # %bb.0: 4680; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 4681; X64-NOBMI-NEXT: andq %rdi, %rax 4682; X64-NOBMI-NEXT: retq 4683; 4684; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64: 4685; X64-BMI1NOTBM: # %bb.0: 4686; X64-BMI1NOTBM-NEXT: movl $15872, %eax # imm = 0x3E00 4687; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax 4688; X64-BMI1NOTBM-NEXT: retq 4689; 4690; X64-BMI1TBM-LABEL: bzhi64_constant_mask64: 4691; X64-BMI1TBM: # %bb.0: 4692; X64-BMI1TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 4693; X64-BMI1TBM-NEXT: retq 4694; 4695; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64: 4696; X64-BMI1NOTBMBMI2: # %bb.0: 4697; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al 4698; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, %rdi, %rax 4699; X64-BMI1NOTBMBMI2-NEXT: retq 4700 %masked = and i64 %val, 4611686018427387903 4701 ret i64 %masked 4702} 4703 4704define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind { 4705; X86-LABEL: bzhi64_constant_mask64_load: 4706; X86: # %bb.0: 4707; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4708; X86-NEXT: movl (%ecx), %eax 4709; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 4710; X86-NEXT: andl 4(%ecx), %edx 4711; X86-NEXT: retl 4712; 4713; X64-NOBMI-LABEL: bzhi64_constant_mask64_load: 4714; X64-NOBMI: # %bb.0: 4715; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 4716; X64-NOBMI-NEXT: andq (%rdi), %rax 4717; X64-NOBMI-NEXT: retq 4718; 4719; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64_load: 4720; X64-BMI1NOTBM: # %bb.0: 4721; X64-BMI1NOTBM-NEXT: movl $15872, %eax # imm = 0x3E00 4722; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax 4723; X64-BMI1NOTBM-NEXT: retq 4724; 4725; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load: 4726; X64-BMI1TBM: # %bb.0: 4727; X64-BMI1TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 4728; X64-BMI1TBM-NEXT: retq 4729; 4730; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64_load: 4731; X64-BMI1NOTBMBMI2: # %bb.0: 4732; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al 4733; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, (%rdi), %rax 4734; X64-BMI1NOTBMBMI2-NEXT: retq 4735 %val1 = load i64, i64* %val 4736 %masked = and i64 %val1, 4611686018427387903 4737 ret i64 %masked 4738} 4739 4740define i64 @bzhi64_constant_mask32(i64 %val) nounwind { 4741; X86-LABEL: bzhi64_constant_mask32: 4742; X86: # %bb.0: 4743; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4744; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4745; X86-NEXT: xorl %edx, %edx 4746; X86-NEXT: retl 4747; 4748; X64-LABEL: bzhi64_constant_mask32: 4749; X64: # %bb.0: 4750; X64-NEXT: movq %rdi, %rax 4751; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4752; X64-NEXT: retq 4753 %masked = and i64 %val, 2147483647 4754 ret i64 %masked 4755} 4756 4757define i64 @bzhi64_constant_mask32_load(i64* %val) nounwind { 4758; X86-LABEL: bzhi64_constant_mask32_load: 4759; X86: # %bb.0: 4760; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4761; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4762; X86-NEXT: andl (%ecx), %eax 4763; X86-NEXT: xorl %edx, %edx 4764; X86-NEXT: retl 4765; 4766; X64-LABEL: bzhi64_constant_mask32_load: 4767; X64: # %bb.0: 4768; X64-NEXT: movq (%rdi), %rax 4769; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4770; X64-NEXT: retq 4771 %val1 = load i64, i64* %val 4772 %masked = and i64 %val1, 2147483647 4773 ret i64 %masked 4774} 4775 4776define i64 @bzhi64_constant_mask16(i64 %val) nounwind { 4777; X86-LABEL: bzhi64_constant_mask16: 4778; X86: # %bb.0: 4779; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4780; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4781; X86-NEXT: xorl %edx, %edx 4782; X86-NEXT: retl 4783; 4784; X64-LABEL: bzhi64_constant_mask16: 4785; X64: # %bb.0: 4786; X64-NEXT: movq %rdi, %rax 4787; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4788; X64-NEXT: retq 4789 %masked = and i64 %val, 32767 4790 ret i64 %masked 4791} 4792 4793define i64 @bzhi64_constant_mask16_load(i64* %val) nounwind { 4794; X86-LABEL: bzhi64_constant_mask16_load: 4795; X86: # %bb.0: 4796; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4797; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4798; X86-NEXT: andl (%ecx), %eax 4799; X86-NEXT: xorl %edx, %edx 4800; X86-NEXT: retl 4801; 4802; X64-LABEL: bzhi64_constant_mask16_load: 4803; X64: # %bb.0: 4804; X64-NEXT: movq (%rdi), %rax 4805; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4806; X64-NEXT: retq 4807 %val1 = load i64, i64* %val 4808 %masked = and i64 %val1, 32767 4809 ret i64 %masked 4810} 4811 4812define i64 @bzhi64_constant_mask8(i64 %val) nounwind { 4813; X86-LABEL: bzhi64_constant_mask8: 4814; X86: # %bb.0: 4815; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4816; X86-NEXT: andl $127, %eax 4817; X86-NEXT: xorl %edx, %edx 4818; X86-NEXT: retl 4819; 4820; X64-LABEL: bzhi64_constant_mask8: 4821; X64: # %bb.0: 4822; X64-NEXT: movq %rdi, %rax 4823; X64-NEXT: andl $127, %eax 4824; X64-NEXT: retq 4825 %masked = and i64 %val, 127 4826 ret i64 %masked 4827} 4828 4829define i64 @bzhi64_constant_mask8_load(i64* %val) nounwind { 4830; X86-LABEL: bzhi64_constant_mask8_load: 4831; X86: # %bb.0: 4832; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4833; X86-NEXT: movl (%eax), %eax 4834; X86-NEXT: andl $127, %eax 4835; X86-NEXT: xorl %edx, %edx 4836; X86-NEXT: retl 4837; 4838; X64-LABEL: bzhi64_constant_mask8_load: 4839; X64: # %bb.0: 4840; X64-NEXT: movq (%rdi), %rax 4841; X64-NEXT: andl $127, %eax 4842; X64-NEXT: retq 4843 %val1 = load i64, i64* %val 4844 %masked = and i64 %val1, 127 4845 ret i64 %masked 4846} 4847