1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 12 13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll 14 15; https://bugs.llvm.org/show_bug.cgi?id=36419 16; https://bugs.llvm.org/show_bug.cgi?id=37603 17; https://bugs.llvm.org/show_bug.cgi?id=37610 18 19; Patterns: 20; a) x & (1 << nbits) - 1 21; b) x & ~(-1 << nbits) 22; c) x & (-1 >> (32 - y)) 23; d) x << (32 - y) >> (32 - y) 24; are equivalent. 25 26; ---------------------------------------------------------------------------- ; 27; Pattern a. 32-bit 28; ---------------------------------------------------------------------------- ; 29 30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { 31; X86-NOBMI-LABEL: bzhi32_a0: 32; X86-NOBMI: # %bb.0: 33; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 34; X86-NOBMI-NEXT: movl $1, %eax 35; X86-NOBMI-NEXT: shll %cl, %eax 36; X86-NOBMI-NEXT: decl %eax 37; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 38; X86-NOBMI-NEXT: retl 39; 40; X86-BMI1BMI2-LABEL: bzhi32_a0: 41; X86-BMI1BMI2: # %bb.0: 42; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 43; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 44; X86-BMI1BMI2-NEXT: retl 45; 46; X64-NOBMI-LABEL: bzhi32_a0: 47; X64-NOBMI: # %bb.0: 48; X64-NOBMI-NEXT: movl $1, %eax 49; X64-NOBMI-NEXT: movl %esi, %ecx 50; X64-NOBMI-NEXT: shll %cl, %eax 51; X64-NOBMI-NEXT: decl %eax 52; X64-NOBMI-NEXT: andl %edi, %eax 53; X64-NOBMI-NEXT: retq 54; 55; X64-BMI1BMI2-LABEL: bzhi32_a0: 56; X64-BMI1BMI2: # %bb.0: 57; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 58; X64-BMI1BMI2-NEXT: retq 59 %onebit = shl i32 1, %numlowbits 60 %mask = add nsw i32 %onebit, -1 61 %masked = and i32 %mask, %val 62 ret i32 %masked 63} 64 65define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 66; X86-NOBMI-LABEL: bzhi32_a1_indexzext: 67; X86-NOBMI: # %bb.0: 68; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 69; X86-NOBMI-NEXT: movl $1, %eax 70; X86-NOBMI-NEXT: shll %cl, %eax 71; X86-NOBMI-NEXT: decl %eax 72; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 73; X86-NOBMI-NEXT: retl 74; 75; X86-BMI1BMI2-LABEL: bzhi32_a1_indexzext: 76; X86-BMI1BMI2: # %bb.0: 77; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 78; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 79; X86-BMI1BMI2-NEXT: retl 80; 81; X64-NOBMI-LABEL: bzhi32_a1_indexzext: 82; X64-NOBMI: # %bb.0: 83; X64-NOBMI-NEXT: movl $1, %eax 84; X64-NOBMI-NEXT: movl %esi, %ecx 85; X64-NOBMI-NEXT: shll %cl, %eax 86; X64-NOBMI-NEXT: decl %eax 87; X64-NOBMI-NEXT: andl %edi, %eax 88; X64-NOBMI-NEXT: retq 89; 90; X64-BMI1BMI2-LABEL: bzhi32_a1_indexzext: 91; X64-BMI1BMI2: # %bb.0: 92; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 93; X64-BMI1BMI2-NEXT: retq 94 %conv = zext i8 %numlowbits to i32 95 %onebit = shl i32 1, %conv 96 %mask = add nsw i32 %onebit, -1 97 %masked = and i32 %mask, %val 98 ret i32 %masked 99} 100 101define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { 102; X86-NOBMI-LABEL: bzhi32_a2_load: 103; X86-NOBMI: # %bb.0: 104; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 105; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 106; X86-NOBMI-NEXT: movl $1, %eax 107; X86-NOBMI-NEXT: shll %cl, %eax 108; X86-NOBMI-NEXT: decl %eax 109; X86-NOBMI-NEXT: andl (%edx), %eax 110; X86-NOBMI-NEXT: retl 111; 112; X86-BMI1BMI2-LABEL: bzhi32_a2_load: 113; X86-BMI1BMI2: # %bb.0: 114; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 115; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 116; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 117; X86-BMI1BMI2-NEXT: retl 118; 119; X64-NOBMI-LABEL: bzhi32_a2_load: 120; X64-NOBMI: # %bb.0: 121; X64-NOBMI-NEXT: movl $1, %eax 122; X64-NOBMI-NEXT: movl %esi, %ecx 123; X64-NOBMI-NEXT: shll %cl, %eax 124; X64-NOBMI-NEXT: decl %eax 125; X64-NOBMI-NEXT: andl (%rdi), %eax 126; X64-NOBMI-NEXT: retq 127; 128; X64-BMI1BMI2-LABEL: bzhi32_a2_load: 129; X64-BMI1BMI2: # %bb.0: 130; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 131; X64-BMI1BMI2-NEXT: retq 132 %val = load i32, i32* %w 133 %onebit = shl i32 1, %numlowbits 134 %mask = add nsw i32 %onebit, -1 135 %masked = and i32 %mask, %val 136 ret i32 %masked 137} 138 139define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { 140; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext: 141; X86-NOBMI: # %bb.0: 142; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 143; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 144; X86-NOBMI-NEXT: movl $1, %eax 145; X86-NOBMI-NEXT: shll %cl, %eax 146; X86-NOBMI-NEXT: decl %eax 147; X86-NOBMI-NEXT: andl (%edx), %eax 148; X86-NOBMI-NEXT: retl 149; 150; X86-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: 151; X86-BMI1BMI2: # %bb.0: 152; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 153; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 154; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 155; X86-BMI1BMI2-NEXT: retl 156; 157; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: 158; X64-NOBMI: # %bb.0: 159; X64-NOBMI-NEXT: movl $1, %eax 160; X64-NOBMI-NEXT: movl %esi, %ecx 161; X64-NOBMI-NEXT: shll %cl, %eax 162; X64-NOBMI-NEXT: decl %eax 163; X64-NOBMI-NEXT: andl (%rdi), %eax 164; X64-NOBMI-NEXT: retq 165; 166; X64-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: 167; X64-BMI1BMI2: # %bb.0: 168; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 169; X64-BMI1BMI2-NEXT: retq 170 %val = load i32, i32* %w 171 %conv = zext i8 %numlowbits to i32 172 %onebit = shl i32 1, %conv 173 %mask = add nsw i32 %onebit, -1 174 %masked = and i32 %mask, %val 175 ret i32 %masked 176} 177 178define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { 179; X86-NOBMI-LABEL: bzhi32_a4_commutative: 180; X86-NOBMI: # %bb.0: 181; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 182; X86-NOBMI-NEXT: movl $1, %eax 183; X86-NOBMI-NEXT: shll %cl, %eax 184; X86-NOBMI-NEXT: decl %eax 185; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 186; X86-NOBMI-NEXT: retl 187; 188; X86-BMI1BMI2-LABEL: bzhi32_a4_commutative: 189; X86-BMI1BMI2: # %bb.0: 190; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 191; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 192; X86-BMI1BMI2-NEXT: retl 193; 194; X64-NOBMI-LABEL: bzhi32_a4_commutative: 195; X64-NOBMI: # %bb.0: 196; X64-NOBMI-NEXT: movl $1, %eax 197; X64-NOBMI-NEXT: movl %esi, %ecx 198; X64-NOBMI-NEXT: shll %cl, %eax 199; X64-NOBMI-NEXT: decl %eax 200; X64-NOBMI-NEXT: andl %edi, %eax 201; X64-NOBMI-NEXT: retq 202; 203; X64-BMI1BMI2-LABEL: bzhi32_a4_commutative: 204; X64-BMI1BMI2: # %bb.0: 205; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 206; X64-BMI1BMI2-NEXT: retq 207 %onebit = shl i32 1, %numlowbits 208 %mask = add nsw i32 %onebit, -1 209 %masked = and i32 %val, %mask ; swapped order 210 ret i32 %masked 211} 212 213; 64-bit 214 215define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { 216; X86-NOBMI-LABEL: bzhi64_a0: 217; X86-NOBMI: # %bb.0: 218; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 219; X86-NOBMI-NEXT: movl $1, %eax 220; X86-NOBMI-NEXT: xorl %edx, %edx 221; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 222; X86-NOBMI-NEXT: shll %cl, %eax 223; X86-NOBMI-NEXT: testb $32, %cl 224; X86-NOBMI-NEXT: je .LBB5_2 225; X86-NOBMI-NEXT: # %bb.1: 226; X86-NOBMI-NEXT: movl %eax, %edx 227; X86-NOBMI-NEXT: xorl %eax, %eax 228; X86-NOBMI-NEXT: .LBB5_2: 229; X86-NOBMI-NEXT: addl $-1, %eax 230; X86-NOBMI-NEXT: adcl $-1, %edx 231; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 232; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 233; X86-NOBMI-NEXT: retl 234; 235; X86-BMI1BMI2-LABEL: bzhi64_a0: 236; X86-BMI1BMI2: # %bb.0: 237; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 238; X86-BMI1BMI2-NEXT: movl $1, %eax 239; X86-BMI1BMI2-NEXT: xorl %edx, %edx 240; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 241; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 242; X86-BMI1BMI2-NEXT: testb $32, %cl 243; X86-BMI1BMI2-NEXT: je .LBB5_2 244; X86-BMI1BMI2-NEXT: # %bb.1: 245; X86-BMI1BMI2-NEXT: movl %eax, %edx 246; X86-BMI1BMI2-NEXT: xorl %eax, %eax 247; X86-BMI1BMI2-NEXT: .LBB5_2: 248; X86-BMI1BMI2-NEXT: addl $-1, %eax 249; X86-BMI1BMI2-NEXT: adcl $-1, %edx 250; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 251; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 252; X86-BMI1BMI2-NEXT: retl 253; 254; X64-NOBMI-LABEL: bzhi64_a0: 255; X64-NOBMI: # %bb.0: 256; X64-NOBMI-NEXT: movl $1, %eax 257; X64-NOBMI-NEXT: movl %esi, %ecx 258; X64-NOBMI-NEXT: shlq %cl, %rax 259; X64-NOBMI-NEXT: decq %rax 260; X64-NOBMI-NEXT: andq %rdi, %rax 261; X64-NOBMI-NEXT: retq 262; 263; X64-BMI1BMI2-LABEL: bzhi64_a0: 264; X64-BMI1BMI2: # %bb.0: 265; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 266; X64-BMI1BMI2-NEXT: retq 267 %onebit = shl i64 1, %numlowbits 268 %mask = add nsw i64 %onebit, -1 269 %masked = and i64 %mask, %val 270 ret i64 %masked 271} 272 273define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 274; X86-NOBMI-LABEL: bzhi64_a1_indexzext: 275; X86-NOBMI: # %bb.0: 276; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 277; X86-NOBMI-NEXT: movl $1, %eax 278; X86-NOBMI-NEXT: xorl %edx, %edx 279; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 280; X86-NOBMI-NEXT: shll %cl, %eax 281; X86-NOBMI-NEXT: testb $32, %cl 282; X86-NOBMI-NEXT: je .LBB6_2 283; X86-NOBMI-NEXT: # %bb.1: 284; X86-NOBMI-NEXT: movl %eax, %edx 285; X86-NOBMI-NEXT: xorl %eax, %eax 286; X86-NOBMI-NEXT: .LBB6_2: 287; X86-NOBMI-NEXT: addl $-1, %eax 288; X86-NOBMI-NEXT: adcl $-1, %edx 289; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 290; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 291; X86-NOBMI-NEXT: retl 292; 293; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext: 294; X86-BMI1BMI2: # %bb.0: 295; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 296; X86-BMI1BMI2-NEXT: movl $1, %eax 297; X86-BMI1BMI2-NEXT: xorl %edx, %edx 298; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 299; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 300; X86-BMI1BMI2-NEXT: testb $32, %cl 301; X86-BMI1BMI2-NEXT: je .LBB6_2 302; X86-BMI1BMI2-NEXT: # %bb.1: 303; X86-BMI1BMI2-NEXT: movl %eax, %edx 304; X86-BMI1BMI2-NEXT: xorl %eax, %eax 305; X86-BMI1BMI2-NEXT: .LBB6_2: 306; X86-BMI1BMI2-NEXT: addl $-1, %eax 307; X86-BMI1BMI2-NEXT: adcl $-1, %edx 308; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 309; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 310; X86-BMI1BMI2-NEXT: retl 311; 312; X64-NOBMI-LABEL: bzhi64_a1_indexzext: 313; X64-NOBMI: # %bb.0: 314; X64-NOBMI-NEXT: movl $1, %eax 315; X64-NOBMI-NEXT: movl %esi, %ecx 316; X64-NOBMI-NEXT: shlq %cl, %rax 317; X64-NOBMI-NEXT: decq %rax 318; X64-NOBMI-NEXT: andq %rdi, %rax 319; X64-NOBMI-NEXT: retq 320; 321; X64-BMI1BMI2-LABEL: bzhi64_a1_indexzext: 322; X64-BMI1BMI2: # %bb.0: 323; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 324; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 325; X64-BMI1BMI2-NEXT: retq 326 %conv = zext i8 %numlowbits to i64 327 %onebit = shl i64 1, %conv 328 %mask = add nsw i64 %onebit, -1 329 %masked = and i64 %mask, %val 330 ret i64 %masked 331} 332 333define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { 334; X86-NOBMI-LABEL: bzhi64_a2_load: 335; X86-NOBMI: # %bb.0: 336; X86-NOBMI-NEXT: pushl %esi 337; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 338; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 339; X86-NOBMI-NEXT: movl $1, %eax 340; X86-NOBMI-NEXT: xorl %edx, %edx 341; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 342; X86-NOBMI-NEXT: shll %cl, %eax 343; X86-NOBMI-NEXT: testb $32, %cl 344; X86-NOBMI-NEXT: je .LBB7_2 345; X86-NOBMI-NEXT: # %bb.1: 346; X86-NOBMI-NEXT: movl %eax, %edx 347; X86-NOBMI-NEXT: xorl %eax, %eax 348; X86-NOBMI-NEXT: .LBB7_2: 349; X86-NOBMI-NEXT: addl $-1, %eax 350; X86-NOBMI-NEXT: adcl $-1, %edx 351; X86-NOBMI-NEXT: andl 4(%esi), %edx 352; X86-NOBMI-NEXT: andl (%esi), %eax 353; X86-NOBMI-NEXT: popl %esi 354; X86-NOBMI-NEXT: retl 355; 356; X86-BMI1BMI2-LABEL: bzhi64_a2_load: 357; X86-BMI1BMI2: # %bb.0: 358; X86-BMI1BMI2-NEXT: pushl %esi 359; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 360; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 361; X86-BMI1BMI2-NEXT: movl $1, %eax 362; X86-BMI1BMI2-NEXT: xorl %edx, %edx 363; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 364; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 365; X86-BMI1BMI2-NEXT: testb $32, %cl 366; X86-BMI1BMI2-NEXT: je .LBB7_2 367; X86-BMI1BMI2-NEXT: # %bb.1: 368; X86-BMI1BMI2-NEXT: movl %eax, %edx 369; X86-BMI1BMI2-NEXT: xorl %eax, %eax 370; X86-BMI1BMI2-NEXT: .LBB7_2: 371; X86-BMI1BMI2-NEXT: addl $-1, %eax 372; X86-BMI1BMI2-NEXT: adcl $-1, %edx 373; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx 374; X86-BMI1BMI2-NEXT: andl (%esi), %eax 375; X86-BMI1BMI2-NEXT: popl %esi 376; X86-BMI1BMI2-NEXT: retl 377; 378; X64-NOBMI-LABEL: bzhi64_a2_load: 379; X64-NOBMI: # %bb.0: 380; X64-NOBMI-NEXT: movl $1, %eax 381; X64-NOBMI-NEXT: movl %esi, %ecx 382; X64-NOBMI-NEXT: shlq %cl, %rax 383; X64-NOBMI-NEXT: decq %rax 384; X64-NOBMI-NEXT: andq (%rdi), %rax 385; X64-NOBMI-NEXT: retq 386; 387; X64-BMI1BMI2-LABEL: bzhi64_a2_load: 388; X64-BMI1BMI2: # %bb.0: 389; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 390; X64-BMI1BMI2-NEXT: retq 391 %val = load i64, i64* %w 392 %onebit = shl i64 1, %numlowbits 393 %mask = add nsw i64 %onebit, -1 394 %masked = and i64 %mask, %val 395 ret i64 %masked 396} 397 398define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { 399; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext: 400; X86-NOBMI: # %bb.0: 401; X86-NOBMI-NEXT: pushl %esi 402; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 403; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 404; X86-NOBMI-NEXT: movl $1, %eax 405; X86-NOBMI-NEXT: xorl %edx, %edx 406; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 407; X86-NOBMI-NEXT: shll %cl, %eax 408; X86-NOBMI-NEXT: testb $32, %cl 409; X86-NOBMI-NEXT: je .LBB8_2 410; X86-NOBMI-NEXT: # %bb.1: 411; X86-NOBMI-NEXT: movl %eax, %edx 412; X86-NOBMI-NEXT: xorl %eax, %eax 413; X86-NOBMI-NEXT: .LBB8_2: 414; X86-NOBMI-NEXT: addl $-1, %eax 415; X86-NOBMI-NEXT: adcl $-1, %edx 416; X86-NOBMI-NEXT: andl 4(%esi), %edx 417; X86-NOBMI-NEXT: andl (%esi), %eax 418; X86-NOBMI-NEXT: popl %esi 419; X86-NOBMI-NEXT: retl 420; 421; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: 422; X86-BMI1BMI2: # %bb.0: 423; X86-BMI1BMI2-NEXT: pushl %esi 424; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 425; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 426; X86-BMI1BMI2-NEXT: movl $1, %eax 427; X86-BMI1BMI2-NEXT: xorl %edx, %edx 428; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 429; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 430; X86-BMI1BMI2-NEXT: testb $32, %cl 431; X86-BMI1BMI2-NEXT: je .LBB8_2 432; X86-BMI1BMI2-NEXT: # %bb.1: 433; X86-BMI1BMI2-NEXT: movl %eax, %edx 434; X86-BMI1BMI2-NEXT: xorl %eax, %eax 435; X86-BMI1BMI2-NEXT: .LBB8_2: 436; X86-BMI1BMI2-NEXT: addl $-1, %eax 437; X86-BMI1BMI2-NEXT: adcl $-1, %edx 438; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx 439; X86-BMI1BMI2-NEXT: andl (%esi), %eax 440; X86-BMI1BMI2-NEXT: popl %esi 441; X86-BMI1BMI2-NEXT: retl 442; 443; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: 444; X64-NOBMI: # %bb.0: 445; X64-NOBMI-NEXT: movl $1, %eax 446; X64-NOBMI-NEXT: movl %esi, %ecx 447; X64-NOBMI-NEXT: shlq %cl, %rax 448; X64-NOBMI-NEXT: decq %rax 449; X64-NOBMI-NEXT: andq (%rdi), %rax 450; X64-NOBMI-NEXT: retq 451; 452; X64-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: 453; X64-BMI1BMI2: # %bb.0: 454; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 455; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 456; X64-BMI1BMI2-NEXT: retq 457 %val = load i64, i64* %w 458 %conv = zext i8 %numlowbits to i64 459 %onebit = shl i64 1, %conv 460 %mask = add nsw i64 %onebit, -1 461 %masked = and i64 %mask, %val 462 ret i64 %masked 463} 464 465define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { 466; X86-NOBMI-LABEL: bzhi64_a4_commutative: 467; X86-NOBMI: # %bb.0: 468; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 469; X86-NOBMI-NEXT: movl $1, %eax 470; X86-NOBMI-NEXT: xorl %edx, %edx 471; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 472; X86-NOBMI-NEXT: shll %cl, %eax 473; X86-NOBMI-NEXT: testb $32, %cl 474; X86-NOBMI-NEXT: je .LBB9_2 475; X86-NOBMI-NEXT: # %bb.1: 476; X86-NOBMI-NEXT: movl %eax, %edx 477; X86-NOBMI-NEXT: xorl %eax, %eax 478; X86-NOBMI-NEXT: .LBB9_2: 479; X86-NOBMI-NEXT: addl $-1, %eax 480; X86-NOBMI-NEXT: adcl $-1, %edx 481; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 482; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 483; X86-NOBMI-NEXT: retl 484; 485; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative: 486; X86-BMI1BMI2: # %bb.0: 487; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 488; X86-BMI1BMI2-NEXT: movl $1, %eax 489; X86-BMI1BMI2-NEXT: xorl %edx, %edx 490; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx 491; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax 492; X86-BMI1BMI2-NEXT: testb $32, %cl 493; X86-BMI1BMI2-NEXT: je .LBB9_2 494; X86-BMI1BMI2-NEXT: # %bb.1: 495; X86-BMI1BMI2-NEXT: movl %eax, %edx 496; X86-BMI1BMI2-NEXT: xorl %eax, %eax 497; X86-BMI1BMI2-NEXT: .LBB9_2: 498; X86-BMI1BMI2-NEXT: addl $-1, %eax 499; X86-BMI1BMI2-NEXT: adcl $-1, %edx 500; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 501; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 502; X86-BMI1BMI2-NEXT: retl 503; 504; X64-NOBMI-LABEL: bzhi64_a4_commutative: 505; X64-NOBMI: # %bb.0: 506; X64-NOBMI-NEXT: movl $1, %eax 507; X64-NOBMI-NEXT: movl %esi, %ecx 508; X64-NOBMI-NEXT: shlq %cl, %rax 509; X64-NOBMI-NEXT: decq %rax 510; X64-NOBMI-NEXT: andq %rdi, %rax 511; X64-NOBMI-NEXT: retq 512; 513; X64-BMI1BMI2-LABEL: bzhi64_a4_commutative: 514; X64-BMI1BMI2: # %bb.0: 515; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 516; X64-BMI1BMI2-NEXT: retq 517 %onebit = shl i64 1, %numlowbits 518 %mask = add nsw i64 %onebit, -1 519 %masked = and i64 %val, %mask ; swapped order 520 ret i64 %masked 521} 522 523; ---------------------------------------------------------------------------- ; 524; Pattern b. 32-bit 525; ---------------------------------------------------------------------------- ; 526 527define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { 528; X86-NOBMI-LABEL: bzhi32_b0: 529; X86-NOBMI: # %bb.0: 530; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 531; X86-NOBMI-NEXT: movl $-1, %eax 532; X86-NOBMI-NEXT: shll %cl, %eax 533; X86-NOBMI-NEXT: notl %eax 534; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 535; X86-NOBMI-NEXT: retl 536; 537; X86-BMI1BMI2-LABEL: bzhi32_b0: 538; X86-BMI1BMI2: # %bb.0: 539; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 540; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 541; X86-BMI1BMI2-NEXT: retl 542; 543; X64-NOBMI-LABEL: bzhi32_b0: 544; X64-NOBMI: # %bb.0: 545; X64-NOBMI-NEXT: movl $-1, %eax 546; X64-NOBMI-NEXT: movl %esi, %ecx 547; X64-NOBMI-NEXT: shll %cl, %eax 548; X64-NOBMI-NEXT: notl %eax 549; X64-NOBMI-NEXT: andl %edi, %eax 550; X64-NOBMI-NEXT: retq 551; 552; X64-BMI1BMI2-LABEL: bzhi32_b0: 553; X64-BMI1BMI2: # %bb.0: 554; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 555; X64-BMI1BMI2-NEXT: retq 556 %notmask = shl i32 -1, %numlowbits 557 %mask = xor i32 %notmask, -1 558 %masked = and i32 %mask, %val 559 ret i32 %masked 560} 561 562define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 563; X86-NOBMI-LABEL: bzhi32_b1_indexzext: 564; X86-NOBMI: # %bb.0: 565; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 566; X86-NOBMI-NEXT: movl $-1, %eax 567; X86-NOBMI-NEXT: shll %cl, %eax 568; X86-NOBMI-NEXT: notl %eax 569; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 570; X86-NOBMI-NEXT: retl 571; 572; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext: 573; X86-BMI1BMI2: # %bb.0: 574; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 575; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 576; X86-BMI1BMI2-NEXT: retl 577; 578; X64-NOBMI-LABEL: bzhi32_b1_indexzext: 579; X64-NOBMI: # %bb.0: 580; X64-NOBMI-NEXT: movl $-1, %eax 581; X64-NOBMI-NEXT: movl %esi, %ecx 582; X64-NOBMI-NEXT: shll %cl, %eax 583; X64-NOBMI-NEXT: notl %eax 584; X64-NOBMI-NEXT: andl %edi, %eax 585; X64-NOBMI-NEXT: retq 586; 587; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext: 588; X64-BMI1BMI2: # %bb.0: 589; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 590; X64-BMI1BMI2-NEXT: retq 591 %conv = zext i8 %numlowbits to i32 592 %notmask = shl i32 -1, %conv 593 %mask = xor i32 %notmask, -1 594 %masked = and i32 %mask, %val 595 ret i32 %masked 596} 597 598define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { 599; X86-NOBMI-LABEL: bzhi32_b2_load: 600; X86-NOBMI: # %bb.0: 601; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 602; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 603; X86-NOBMI-NEXT: movl $-1, %eax 604; X86-NOBMI-NEXT: shll %cl, %eax 605; X86-NOBMI-NEXT: notl %eax 606; X86-NOBMI-NEXT: andl (%edx), %eax 607; X86-NOBMI-NEXT: retl 608; 609; X86-BMI1BMI2-LABEL: bzhi32_b2_load: 610; X86-BMI1BMI2: # %bb.0: 611; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 612; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 613; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 614; X86-BMI1BMI2-NEXT: retl 615; 616; X64-NOBMI-LABEL: bzhi32_b2_load: 617; X64-NOBMI: # %bb.0: 618; X64-NOBMI-NEXT: movl $-1, %eax 619; X64-NOBMI-NEXT: movl %esi, %ecx 620; X64-NOBMI-NEXT: shll %cl, %eax 621; X64-NOBMI-NEXT: notl %eax 622; X64-NOBMI-NEXT: andl (%rdi), %eax 623; X64-NOBMI-NEXT: retq 624; 625; X64-BMI1BMI2-LABEL: bzhi32_b2_load: 626; X64-BMI1BMI2: # %bb.0: 627; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 628; X64-BMI1BMI2-NEXT: retq 629 %val = load i32, i32* %w 630 %notmask = shl i32 -1, %numlowbits 631 %mask = xor i32 %notmask, -1 632 %masked = and i32 %mask, %val 633 ret i32 %masked 634} 635 636define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { 637; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext: 638; X86-NOBMI: # %bb.0: 639; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 640; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 641; X86-NOBMI-NEXT: movl $-1, %eax 642; X86-NOBMI-NEXT: shll %cl, %eax 643; X86-NOBMI-NEXT: notl %eax 644; X86-NOBMI-NEXT: andl (%edx), %eax 645; X86-NOBMI-NEXT: retl 646; 647; X86-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: 648; X86-BMI1BMI2: # %bb.0: 649; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 650; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 651; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 652; X86-BMI1BMI2-NEXT: retl 653; 654; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: 655; X64-NOBMI: # %bb.0: 656; X64-NOBMI-NEXT: movl $-1, %eax 657; X64-NOBMI-NEXT: movl %esi, %ecx 658; X64-NOBMI-NEXT: shll %cl, %eax 659; X64-NOBMI-NEXT: notl %eax 660; X64-NOBMI-NEXT: andl (%rdi), %eax 661; X64-NOBMI-NEXT: retq 662; 663; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: 664; X64-BMI1BMI2: # %bb.0: 665; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 666; X64-BMI1BMI2-NEXT: retq 667 %val = load i32, i32* %w 668 %conv = zext i8 %numlowbits to i32 669 %notmask = shl i32 -1, %conv 670 %mask = xor i32 %notmask, -1 671 %masked = and i32 %mask, %val 672 ret i32 %masked 673} 674 675define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { 676; X86-NOBMI-LABEL: bzhi32_b4_commutative: 677; X86-NOBMI: # %bb.0: 678; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 679; X86-NOBMI-NEXT: movl $-1, %eax 680; X86-NOBMI-NEXT: shll %cl, %eax 681; X86-NOBMI-NEXT: notl %eax 682; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 683; X86-NOBMI-NEXT: retl 684; 685; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative: 686; X86-BMI1BMI2: # %bb.0: 687; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 688; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 689; X86-BMI1BMI2-NEXT: retl 690; 691; X64-NOBMI-LABEL: bzhi32_b4_commutative: 692; X64-NOBMI: # %bb.0: 693; X64-NOBMI-NEXT: movl $-1, %eax 694; X64-NOBMI-NEXT: movl %esi, %ecx 695; X64-NOBMI-NEXT: shll %cl, %eax 696; X64-NOBMI-NEXT: notl %eax 697; X64-NOBMI-NEXT: andl %edi, %eax 698; X64-NOBMI-NEXT: retq 699; 700; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative: 701; X64-BMI1BMI2: # %bb.0: 702; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 703; X64-BMI1BMI2-NEXT: retq 704 %notmask = shl i32 -1, %numlowbits 705 %mask = xor i32 %notmask, -1 706 %masked = and i32 %val, %mask ; swapped order 707 ret i32 %masked 708} 709 710; 64-bit 711 712define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { 713; X86-NOBMI-LABEL: bzhi64_b0: 714; X86-NOBMI: # %bb.0: 715; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 716; X86-NOBMI-NEXT: movl $-1, %edx 717; X86-NOBMI-NEXT: movl $-1, %eax 718; X86-NOBMI-NEXT: shll %cl, %eax 719; X86-NOBMI-NEXT: shldl %cl, %edx, %edx 720; X86-NOBMI-NEXT: testb $32, %cl 721; X86-NOBMI-NEXT: je .LBB15_2 722; X86-NOBMI-NEXT: # %bb.1: 723; X86-NOBMI-NEXT: movl %eax, %edx 724; X86-NOBMI-NEXT: xorl %eax, %eax 725; X86-NOBMI-NEXT: .LBB15_2: 726; X86-NOBMI-NEXT: notl %edx 727; X86-NOBMI-NEXT: notl %eax 728; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 729; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 730; X86-NOBMI-NEXT: retl 731; 732; X86-BMI1BMI2-LABEL: bzhi64_b0: 733; X86-BMI1BMI2: # %bb.0: 734; X86-BMI1BMI2-NEXT: pushl %esi 735; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 736; X86-BMI1BMI2-NEXT: movl $-1, %eax 737; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi 738; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax 739; X86-BMI1BMI2-NEXT: testb $32, %cl 740; X86-BMI1BMI2-NEXT: je .LBB15_2 741; X86-BMI1BMI2-NEXT: # %bb.1: 742; X86-BMI1BMI2-NEXT: movl %esi, %eax 743; X86-BMI1BMI2-NEXT: xorl %esi, %esi 744; X86-BMI1BMI2-NEXT: .LBB15_2: 745; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx 746; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax 747; X86-BMI1BMI2-NEXT: popl %esi 748; X86-BMI1BMI2-NEXT: retl 749; 750; X64-NOBMI-LABEL: bzhi64_b0: 751; X64-NOBMI: # %bb.0: 752; X64-NOBMI-NEXT: movq $-1, %rax 753; X64-NOBMI-NEXT: movl %esi, %ecx 754; X64-NOBMI-NEXT: shlq %cl, %rax 755; X64-NOBMI-NEXT: notq %rax 756; X64-NOBMI-NEXT: andq %rdi, %rax 757; X64-NOBMI-NEXT: retq 758; 759; X64-BMI1BMI2-LABEL: bzhi64_b0: 760; X64-BMI1BMI2: # %bb.0: 761; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 762; X64-BMI1BMI2-NEXT: retq 763 %notmask = shl i64 -1, %numlowbits 764 %mask = xor i64 %notmask, -1 765 %masked = and i64 %mask, %val 766 ret i64 %masked 767} 768 769define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 770; X86-NOBMI-LABEL: bzhi64_b1_indexzext: 771; X86-NOBMI: # %bb.0: 772; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 773; X86-NOBMI-NEXT: movl $-1, %edx 774; X86-NOBMI-NEXT: movl $-1, %eax 775; X86-NOBMI-NEXT: shll %cl, %eax 776; X86-NOBMI-NEXT: shldl %cl, %edx, %edx 777; X86-NOBMI-NEXT: testb $32, %cl 778; X86-NOBMI-NEXT: je .LBB16_2 779; X86-NOBMI-NEXT: # %bb.1: 780; X86-NOBMI-NEXT: movl %eax, %edx 781; X86-NOBMI-NEXT: xorl %eax, %eax 782; X86-NOBMI-NEXT: .LBB16_2: 783; X86-NOBMI-NEXT: notl %edx 784; X86-NOBMI-NEXT: notl %eax 785; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 786; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 787; X86-NOBMI-NEXT: retl 788; 789; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: 790; X86-BMI1BMI2: # %bb.0: 791; X86-BMI1BMI2-NEXT: pushl %esi 792; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 793; X86-BMI1BMI2-NEXT: movl $-1, %eax 794; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi 795; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax 796; X86-BMI1BMI2-NEXT: testb $32, %cl 797; X86-BMI1BMI2-NEXT: je .LBB16_2 798; X86-BMI1BMI2-NEXT: # %bb.1: 799; X86-BMI1BMI2-NEXT: movl %esi, %eax 800; X86-BMI1BMI2-NEXT: xorl %esi, %esi 801; X86-BMI1BMI2-NEXT: .LBB16_2: 802; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx 803; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax 804; X86-BMI1BMI2-NEXT: popl %esi 805; X86-BMI1BMI2-NEXT: retl 806; 807; X64-NOBMI-LABEL: bzhi64_b1_indexzext: 808; X64-NOBMI: # %bb.0: 809; X64-NOBMI-NEXT: movq $-1, %rax 810; X64-NOBMI-NEXT: movl %esi, %ecx 811; X64-NOBMI-NEXT: shlq %cl, %rax 812; X64-NOBMI-NEXT: notq %rax 813; X64-NOBMI-NEXT: andq %rdi, %rax 814; X64-NOBMI-NEXT: retq 815; 816; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext: 817; X64-BMI1BMI2: # %bb.0: 818; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 819; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 820; X64-BMI1BMI2-NEXT: retq 821 %conv = zext i8 %numlowbits to i64 822 %notmask = shl i64 -1, %conv 823 %mask = xor i64 %notmask, -1 824 %masked = and i64 %mask, %val 825 ret i64 %masked 826} 827 828define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { 829; X86-NOBMI-LABEL: bzhi64_b2_load: 830; X86-NOBMI: # %bb.0: 831; X86-NOBMI-NEXT: pushl %esi 832; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 833; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 834; X86-NOBMI-NEXT: movl $-1, %edx 835; X86-NOBMI-NEXT: movl $-1, %eax 836; X86-NOBMI-NEXT: shll %cl, %eax 837; X86-NOBMI-NEXT: shldl %cl, %edx, %edx 838; X86-NOBMI-NEXT: testb $32, %cl 839; X86-NOBMI-NEXT: je .LBB17_2 840; X86-NOBMI-NEXT: # %bb.1: 841; X86-NOBMI-NEXT: movl %eax, %edx 842; X86-NOBMI-NEXT: xorl %eax, %eax 843; X86-NOBMI-NEXT: .LBB17_2: 844; X86-NOBMI-NEXT: notl %edx 845; X86-NOBMI-NEXT: notl %eax 846; X86-NOBMI-NEXT: andl 4(%esi), %edx 847; X86-NOBMI-NEXT: andl (%esi), %eax 848; X86-NOBMI-NEXT: popl %esi 849; X86-NOBMI-NEXT: retl 850; 851; X86-BMI1BMI2-LABEL: bzhi64_b2_load: 852; X86-BMI1BMI2: # %bb.0: 853; X86-BMI1BMI2-NEXT: pushl %esi 854; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 855; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 856; X86-BMI1BMI2-NEXT: movl $-1, %edx 857; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi 858; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx 859; X86-BMI1BMI2-NEXT: testb $32, %cl 860; X86-BMI1BMI2-NEXT: je .LBB17_2 861; X86-BMI1BMI2-NEXT: # %bb.1: 862; X86-BMI1BMI2-NEXT: movl %esi, %edx 863; X86-BMI1BMI2-NEXT: xorl %esi, %esi 864; X86-BMI1BMI2-NEXT: .LBB17_2: 865; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx 866; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax 867; X86-BMI1BMI2-NEXT: popl %esi 868; X86-BMI1BMI2-NEXT: retl 869; 870; X64-NOBMI-LABEL: bzhi64_b2_load: 871; X64-NOBMI: # %bb.0: 872; X64-NOBMI-NEXT: movq $-1, %rax 873; X64-NOBMI-NEXT: movl %esi, %ecx 874; X64-NOBMI-NEXT: shlq %cl, %rax 875; X64-NOBMI-NEXT: notq %rax 876; X64-NOBMI-NEXT: andq (%rdi), %rax 877; X64-NOBMI-NEXT: retq 878; 879; X64-BMI1BMI2-LABEL: bzhi64_b2_load: 880; X64-BMI1BMI2: # %bb.0: 881; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 882; X64-BMI1BMI2-NEXT: retq 883 %val = load i64, i64* %w 884 %notmask = shl i64 -1, %numlowbits 885 %mask = xor i64 %notmask, -1 886 %masked = and i64 %mask, %val 887 ret i64 %masked 888} 889 890define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { 891; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext: 892; X86-NOBMI: # %bb.0: 893; X86-NOBMI-NEXT: pushl %esi 894; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 895; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 896; X86-NOBMI-NEXT: movl $-1, %edx 897; X86-NOBMI-NEXT: movl $-1, %eax 898; X86-NOBMI-NEXT: shll %cl, %eax 899; X86-NOBMI-NEXT: shldl %cl, %edx, %edx 900; X86-NOBMI-NEXT: testb $32, %cl 901; X86-NOBMI-NEXT: je .LBB18_2 902; X86-NOBMI-NEXT: # %bb.1: 903; X86-NOBMI-NEXT: movl %eax, %edx 904; X86-NOBMI-NEXT: xorl %eax, %eax 905; X86-NOBMI-NEXT: .LBB18_2: 906; X86-NOBMI-NEXT: notl %edx 907; X86-NOBMI-NEXT: notl %eax 908; X86-NOBMI-NEXT: andl 4(%esi), %edx 909; X86-NOBMI-NEXT: andl (%esi), %eax 910; X86-NOBMI-NEXT: popl %esi 911; X86-NOBMI-NEXT: retl 912; 913; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: 914; X86-BMI1BMI2: # %bb.0: 915; X86-BMI1BMI2-NEXT: pushl %esi 916; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 917; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 918; X86-BMI1BMI2-NEXT: movl $-1, %edx 919; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi 920; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx 921; X86-BMI1BMI2-NEXT: testb $32, %cl 922; X86-BMI1BMI2-NEXT: je .LBB18_2 923; X86-BMI1BMI2-NEXT: # %bb.1: 924; X86-BMI1BMI2-NEXT: movl %esi, %edx 925; X86-BMI1BMI2-NEXT: xorl %esi, %esi 926; X86-BMI1BMI2-NEXT: .LBB18_2: 927; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx 928; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax 929; X86-BMI1BMI2-NEXT: popl %esi 930; X86-BMI1BMI2-NEXT: retl 931; 932; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: 933; X64-NOBMI: # %bb.0: 934; X64-NOBMI-NEXT: movq $-1, %rax 935; X64-NOBMI-NEXT: movl %esi, %ecx 936; X64-NOBMI-NEXT: shlq %cl, %rax 937; X64-NOBMI-NEXT: notq %rax 938; X64-NOBMI-NEXT: andq (%rdi), %rax 939; X64-NOBMI-NEXT: retq 940; 941; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: 942; X64-BMI1BMI2: # %bb.0: 943; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 944; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 945; X64-BMI1BMI2-NEXT: retq 946 %val = load i64, i64* %w 947 %conv = zext i8 %numlowbits to i64 948 %notmask = shl i64 -1, %conv 949 %mask = xor i64 %notmask, -1 950 %masked = and i64 %mask, %val 951 ret i64 %masked 952} 953 954define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { 955; X86-NOBMI-LABEL: bzhi64_b4_commutative: 956; X86-NOBMI: # %bb.0: 957; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl 958; X86-NOBMI-NEXT: movl $-1, %edx 959; X86-NOBMI-NEXT: movl $-1, %eax 960; X86-NOBMI-NEXT: shll %cl, %eax 961; X86-NOBMI-NEXT: shldl %cl, %edx, %edx 962; X86-NOBMI-NEXT: testb $32, %cl 963; X86-NOBMI-NEXT: je .LBB19_2 964; X86-NOBMI-NEXT: # %bb.1: 965; X86-NOBMI-NEXT: movl %eax, %edx 966; X86-NOBMI-NEXT: xorl %eax, %eax 967; X86-NOBMI-NEXT: .LBB19_2: 968; X86-NOBMI-NEXT: notl %edx 969; X86-NOBMI-NEXT: notl %eax 970; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 971; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 972; X86-NOBMI-NEXT: retl 973; 974; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: 975; X86-BMI1BMI2: # %bb.0: 976; X86-BMI1BMI2-NEXT: pushl %esi 977; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 978; X86-BMI1BMI2-NEXT: movl $-1, %eax 979; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi 980; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax 981; X86-BMI1BMI2-NEXT: testb $32, %cl 982; X86-BMI1BMI2-NEXT: je .LBB19_2 983; X86-BMI1BMI2-NEXT: # %bb.1: 984; X86-BMI1BMI2-NEXT: movl %esi, %eax 985; X86-BMI1BMI2-NEXT: xorl %esi, %esi 986; X86-BMI1BMI2-NEXT: .LBB19_2: 987; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx 988; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax 989; X86-BMI1BMI2-NEXT: popl %esi 990; X86-BMI1BMI2-NEXT: retl 991; 992; X64-NOBMI-LABEL: bzhi64_b4_commutative: 993; X64-NOBMI: # %bb.0: 994; X64-NOBMI-NEXT: movq $-1, %rax 995; X64-NOBMI-NEXT: movl %esi, %ecx 996; X64-NOBMI-NEXT: shlq %cl, %rax 997; X64-NOBMI-NEXT: notq %rax 998; X64-NOBMI-NEXT: andq %rdi, %rax 999; X64-NOBMI-NEXT: retq 1000; 1001; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative: 1002; X64-BMI1BMI2: # %bb.0: 1003; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1004; X64-BMI1BMI2-NEXT: retq 1005 %notmask = shl i64 -1, %numlowbits 1006 %mask = xor i64 %notmask, -1 1007 %masked = and i64 %val, %mask ; swapped order 1008 ret i64 %masked 1009} 1010 1011; ---------------------------------------------------------------------------- ; 1012; Pattern c. 32-bit 1013; ---------------------------------------------------------------------------- ; 1014 1015define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { 1016; X86-NOBMI-LABEL: bzhi32_c0: 1017; X86-NOBMI: # %bb.0: 1018; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1019; X86-NOBMI-NEXT: movl $32, %ecx 1020; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1021; X86-NOBMI-NEXT: shll %cl, %eax 1022; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1023; X86-NOBMI-NEXT: shrl %cl, %eax 1024; X86-NOBMI-NEXT: retl 1025; 1026; X86-BMI1BMI2-LABEL: bzhi32_c0: 1027; X86-BMI1BMI2: # %bb.0: 1028; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1029; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1030; X86-BMI1BMI2-NEXT: retl 1031; 1032; X64-NOBMI-LABEL: bzhi32_c0: 1033; X64-NOBMI: # %bb.0: 1034; X64-NOBMI-NEXT: movl $32, %ecx 1035; X64-NOBMI-NEXT: subl %esi, %ecx 1036; X64-NOBMI-NEXT: shll %cl, %edi 1037; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1038; X64-NOBMI-NEXT: shrl %cl, %edi 1039; X64-NOBMI-NEXT: movl %edi, %eax 1040; X64-NOBMI-NEXT: retq 1041; 1042; X64-BMI1BMI2-LABEL: bzhi32_c0: 1043; X64-BMI1BMI2: # %bb.0: 1044; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1045; X64-BMI1BMI2-NEXT: retq 1046 %numhighbits = sub i32 32, %numlowbits 1047 %mask = lshr i32 -1, %numhighbits 1048 %masked = and i32 %mask, %val 1049 ret i32 %masked 1050} 1051 1052define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { 1053; X86-NOBMI-LABEL: bzhi32_c1_indexzext: 1054; X86-NOBMI: # %bb.0: 1055; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1056; X86-NOBMI-NEXT: movb $32, %cl 1057; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1058; X86-NOBMI-NEXT: shll %cl, %eax 1059; X86-NOBMI-NEXT: shrl %cl, %eax 1060; X86-NOBMI-NEXT: retl 1061; 1062; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext: 1063; X86-BMI1BMI2: # %bb.0: 1064; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1065; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1066; X86-BMI1BMI2-NEXT: retl 1067; 1068; X64-NOBMI-LABEL: bzhi32_c1_indexzext: 1069; X64-NOBMI: # %bb.0: 1070; X64-NOBMI-NEXT: movb $32, %cl 1071; X64-NOBMI-NEXT: subb %sil, %cl 1072; X64-NOBMI-NEXT: shll %cl, %edi 1073; X64-NOBMI-NEXT: shrl %cl, %edi 1074; X64-NOBMI-NEXT: movl %edi, %eax 1075; X64-NOBMI-NEXT: retq 1076; 1077; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext: 1078; X64-BMI1BMI2: # %bb.0: 1079; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1080; X64-BMI1BMI2-NEXT: retq 1081 %numhighbits = sub i8 32, %numlowbits 1082 %sh_prom = zext i8 %numhighbits to i32 1083 %mask = lshr i32 -1, %sh_prom 1084 %masked = and i32 %mask, %val 1085 ret i32 %masked 1086} 1087 1088define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { 1089; X86-NOBMI-LABEL: bzhi32_c2_load: 1090; X86-NOBMI: # %bb.0: 1091; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1092; X86-NOBMI-NEXT: movl (%eax), %eax 1093; X86-NOBMI-NEXT: movl $32, %ecx 1094; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1095; X86-NOBMI-NEXT: shll %cl, %eax 1096; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1097; X86-NOBMI-NEXT: shrl %cl, %eax 1098; X86-NOBMI-NEXT: retl 1099; 1100; X86-BMI1BMI2-LABEL: bzhi32_c2_load: 1101; X86-BMI1BMI2: # %bb.0: 1102; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1103; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1104; X86-BMI1BMI2-NEXT: bzhil %eax, (%ecx), %eax 1105; X86-BMI1BMI2-NEXT: retl 1106; 1107; X64-NOBMI-LABEL: bzhi32_c2_load: 1108; X64-NOBMI: # %bb.0: 1109; X64-NOBMI-NEXT: movl (%rdi), %eax 1110; X64-NOBMI-NEXT: movl $32, %ecx 1111; X64-NOBMI-NEXT: subl %esi, %ecx 1112; X64-NOBMI-NEXT: shll %cl, %eax 1113; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1114; X64-NOBMI-NEXT: shrl %cl, %eax 1115; X64-NOBMI-NEXT: retq 1116; 1117; X64-BMI1BMI2-LABEL: bzhi32_c2_load: 1118; X64-BMI1BMI2: # %bb.0: 1119; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 1120; X64-BMI1BMI2-NEXT: retq 1121 %val = load i32, i32* %w 1122 %numhighbits = sub i32 32, %numlowbits 1123 %mask = lshr i32 -1, %numhighbits 1124 %masked = and i32 %mask, %val 1125 ret i32 %masked 1126} 1127 1128define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 1129; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext: 1130; X86-NOBMI: # %bb.0: 1131; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1132; X86-NOBMI-NEXT: movl (%eax), %eax 1133; X86-NOBMI-NEXT: movb $32, %cl 1134; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1135; X86-NOBMI-NEXT: shll %cl, %eax 1136; X86-NOBMI-NEXT: shrl %cl, %eax 1137; X86-NOBMI-NEXT: retl 1138; 1139; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: 1140; X86-BMI1BMI2: # %bb.0: 1141; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1142; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1143; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 1144; X86-BMI1BMI2-NEXT: retl 1145; 1146; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: 1147; X64-NOBMI: # %bb.0: 1148; X64-NOBMI-NEXT: movl (%rdi), %eax 1149; X64-NOBMI-NEXT: movb $32, %cl 1150; X64-NOBMI-NEXT: subb %sil, %cl 1151; X64-NOBMI-NEXT: shll %cl, %eax 1152; X64-NOBMI-NEXT: shrl %cl, %eax 1153; X64-NOBMI-NEXT: retq 1154; 1155; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: 1156; X64-BMI1BMI2: # %bb.0: 1157; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 1158; X64-BMI1BMI2-NEXT: retq 1159 %val = load i32, i32* %w 1160 %numhighbits = sub i8 32, %numlowbits 1161 %sh_prom = zext i8 %numhighbits to i32 1162 %mask = lshr i32 -1, %sh_prom 1163 %masked = and i32 %mask, %val 1164 ret i32 %masked 1165} 1166 1167define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { 1168; X86-NOBMI-LABEL: bzhi32_c4_commutative: 1169; X86-NOBMI: # %bb.0: 1170; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1171; X86-NOBMI-NEXT: movl $32, %ecx 1172; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1173; X86-NOBMI-NEXT: shll %cl, %eax 1174; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1175; X86-NOBMI-NEXT: shrl %cl, %eax 1176; X86-NOBMI-NEXT: retl 1177; 1178; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative: 1179; X86-BMI1BMI2: # %bb.0: 1180; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1181; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1182; X86-BMI1BMI2-NEXT: retl 1183; 1184; X64-NOBMI-LABEL: bzhi32_c4_commutative: 1185; X64-NOBMI: # %bb.0: 1186; X64-NOBMI-NEXT: movl $32, %ecx 1187; X64-NOBMI-NEXT: subl %esi, %ecx 1188; X64-NOBMI-NEXT: shll %cl, %edi 1189; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1190; X64-NOBMI-NEXT: shrl %cl, %edi 1191; X64-NOBMI-NEXT: movl %edi, %eax 1192; X64-NOBMI-NEXT: retq 1193; 1194; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative: 1195; X64-BMI1BMI2: # %bb.0: 1196; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1197; X64-BMI1BMI2-NEXT: retq 1198 %numhighbits = sub i32 32, %numlowbits 1199 %mask = lshr i32 -1, %numhighbits 1200 %masked = and i32 %val, %mask ; swapped order 1201 ret i32 %masked 1202} 1203 1204; 64-bit 1205 1206define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { 1207; X86-NOBMI-LABEL: bzhi64_c0: 1208; X86-NOBMI: # %bb.0: 1209; X86-NOBMI-NEXT: movl $64, %ecx 1210; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1211; X86-NOBMI-NEXT: movl $-1, %eax 1212; X86-NOBMI-NEXT: movl $-1, %edx 1213; X86-NOBMI-NEXT: shrl %cl, %edx 1214; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax 1215; X86-NOBMI-NEXT: testb $32, %cl 1216; X86-NOBMI-NEXT: je .LBB25_2 1217; X86-NOBMI-NEXT: # %bb.1: 1218; X86-NOBMI-NEXT: movl %edx, %eax 1219; X86-NOBMI-NEXT: xorl %edx, %edx 1220; X86-NOBMI-NEXT: .LBB25_2: 1221; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1222; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1223; X86-NOBMI-NEXT: retl 1224; 1225; X86-BMI1BMI2-LABEL: bzhi64_c0: 1226; X86-BMI1BMI2: # %bb.0: 1227; X86-BMI1BMI2-NEXT: movl $64, %ecx 1228; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1229; X86-BMI1BMI2-NEXT: movl $-1, %eax 1230; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx 1231; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax 1232; X86-BMI1BMI2-NEXT: testb $32, %cl 1233; X86-BMI1BMI2-NEXT: je .LBB25_2 1234; X86-BMI1BMI2-NEXT: # %bb.1: 1235; X86-BMI1BMI2-NEXT: movl %edx, %eax 1236; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1237; X86-BMI1BMI2-NEXT: .LBB25_2: 1238; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1239; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1240; X86-BMI1BMI2-NEXT: retl 1241; 1242; X64-NOBMI-LABEL: bzhi64_c0: 1243; X64-NOBMI: # %bb.0: 1244; X64-NOBMI-NEXT: movl $64, %ecx 1245; X64-NOBMI-NEXT: subl %esi, %ecx 1246; X64-NOBMI-NEXT: shlq %cl, %rdi 1247; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1248; X64-NOBMI-NEXT: shrq %cl, %rdi 1249; X64-NOBMI-NEXT: movq %rdi, %rax 1250; X64-NOBMI-NEXT: retq 1251; 1252; X64-BMI1BMI2-LABEL: bzhi64_c0: 1253; X64-BMI1BMI2: # %bb.0: 1254; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1255; X64-BMI1BMI2-NEXT: retq 1256 %numhighbits = sub i64 64, %numlowbits 1257 %mask = lshr i64 -1, %numhighbits 1258 %masked = and i64 %mask, %val 1259 ret i64 %masked 1260} 1261 1262define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { 1263; X86-NOBMI-LABEL: bzhi64_c1_indexzext: 1264; X86-NOBMI: # %bb.0: 1265; X86-NOBMI-NEXT: movb $64, %cl 1266; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1267; X86-NOBMI-NEXT: movl $-1, %eax 1268; X86-NOBMI-NEXT: movl $-1, %edx 1269; X86-NOBMI-NEXT: shrl %cl, %edx 1270; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax 1271; X86-NOBMI-NEXT: testb $32, %cl 1272; X86-NOBMI-NEXT: je .LBB26_2 1273; X86-NOBMI-NEXT: # %bb.1: 1274; X86-NOBMI-NEXT: movl %edx, %eax 1275; X86-NOBMI-NEXT: xorl %edx, %edx 1276; X86-NOBMI-NEXT: .LBB26_2: 1277; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1278; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1279; X86-NOBMI-NEXT: retl 1280; 1281; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext: 1282; X86-BMI1BMI2: # %bb.0: 1283; X86-BMI1BMI2-NEXT: movb $64, %cl 1284; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1285; X86-BMI1BMI2-NEXT: movl $-1, %eax 1286; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx 1287; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax 1288; X86-BMI1BMI2-NEXT: testb $32, %cl 1289; X86-BMI1BMI2-NEXT: je .LBB26_2 1290; X86-BMI1BMI2-NEXT: # %bb.1: 1291; X86-BMI1BMI2-NEXT: movl %edx, %eax 1292; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1293; X86-BMI1BMI2-NEXT: .LBB26_2: 1294; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1295; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1296; X86-BMI1BMI2-NEXT: retl 1297; 1298; X64-NOBMI-LABEL: bzhi64_c1_indexzext: 1299; X64-NOBMI: # %bb.0: 1300; X64-NOBMI-NEXT: movb $64, %cl 1301; X64-NOBMI-NEXT: subb %sil, %cl 1302; X64-NOBMI-NEXT: shlq %cl, %rdi 1303; X64-NOBMI-NEXT: shrq %cl, %rdi 1304; X64-NOBMI-NEXT: movq %rdi, %rax 1305; X64-NOBMI-NEXT: retq 1306; 1307; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext: 1308; X64-BMI1BMI2: # %bb.0: 1309; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1310; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1311; X64-BMI1BMI2-NEXT: retq 1312 %numhighbits = sub i8 64, %numlowbits 1313 %sh_prom = zext i8 %numhighbits to i64 1314 %mask = lshr i64 -1, %sh_prom 1315 %masked = and i64 %mask, %val 1316 ret i64 %masked 1317} 1318 1319define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { 1320; X86-NOBMI-LABEL: bzhi64_c2_load: 1321; X86-NOBMI: # %bb.0: 1322; X86-NOBMI-NEXT: pushl %esi 1323; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1324; X86-NOBMI-NEXT: movl $64, %ecx 1325; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1326; X86-NOBMI-NEXT: movl $-1, %eax 1327; X86-NOBMI-NEXT: movl $-1, %edx 1328; X86-NOBMI-NEXT: shrl %cl, %edx 1329; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax 1330; X86-NOBMI-NEXT: testb $32, %cl 1331; X86-NOBMI-NEXT: je .LBB27_2 1332; X86-NOBMI-NEXT: # %bb.1: 1333; X86-NOBMI-NEXT: movl %edx, %eax 1334; X86-NOBMI-NEXT: xorl %edx, %edx 1335; X86-NOBMI-NEXT: .LBB27_2: 1336; X86-NOBMI-NEXT: andl (%esi), %eax 1337; X86-NOBMI-NEXT: andl 4(%esi), %edx 1338; X86-NOBMI-NEXT: popl %esi 1339; X86-NOBMI-NEXT: retl 1340; 1341; X86-BMI1BMI2-LABEL: bzhi64_c2_load: 1342; X86-BMI1BMI2: # %bb.0: 1343; X86-BMI1BMI2-NEXT: pushl %esi 1344; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1345; X86-BMI1BMI2-NEXT: movl $64, %ecx 1346; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1347; X86-BMI1BMI2-NEXT: movl $-1, %eax 1348; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx 1349; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax 1350; X86-BMI1BMI2-NEXT: testb $32, %cl 1351; X86-BMI1BMI2-NEXT: je .LBB27_2 1352; X86-BMI1BMI2-NEXT: # %bb.1: 1353; X86-BMI1BMI2-NEXT: movl %edx, %eax 1354; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1355; X86-BMI1BMI2-NEXT: .LBB27_2: 1356; X86-BMI1BMI2-NEXT: andl (%esi), %eax 1357; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx 1358; X86-BMI1BMI2-NEXT: popl %esi 1359; X86-BMI1BMI2-NEXT: retl 1360; 1361; X64-NOBMI-LABEL: bzhi64_c2_load: 1362; X64-NOBMI: # %bb.0: 1363; X64-NOBMI-NEXT: movq (%rdi), %rax 1364; X64-NOBMI-NEXT: movl $64, %ecx 1365; X64-NOBMI-NEXT: subl %esi, %ecx 1366; X64-NOBMI-NEXT: shlq %cl, %rax 1367; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1368; X64-NOBMI-NEXT: shrq %cl, %rax 1369; X64-NOBMI-NEXT: retq 1370; 1371; X64-BMI1BMI2-LABEL: bzhi64_c2_load: 1372; X64-BMI1BMI2: # %bb.0: 1373; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1374; X64-BMI1BMI2-NEXT: retq 1375 %val = load i64, i64* %w 1376 %numhighbits = sub i64 64, %numlowbits 1377 %mask = lshr i64 -1, %numhighbits 1378 %masked = and i64 %mask, %val 1379 ret i64 %masked 1380} 1381 1382define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 1383; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext: 1384; X86-NOBMI: # %bb.0: 1385; X86-NOBMI-NEXT: pushl %esi 1386; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1387; X86-NOBMI-NEXT: movb $64, %cl 1388; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1389; X86-NOBMI-NEXT: movl $-1, %eax 1390; X86-NOBMI-NEXT: movl $-1, %edx 1391; X86-NOBMI-NEXT: shrl %cl, %edx 1392; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax 1393; X86-NOBMI-NEXT: testb $32, %cl 1394; X86-NOBMI-NEXT: je .LBB28_2 1395; X86-NOBMI-NEXT: # %bb.1: 1396; X86-NOBMI-NEXT: movl %edx, %eax 1397; X86-NOBMI-NEXT: xorl %edx, %edx 1398; X86-NOBMI-NEXT: .LBB28_2: 1399; X86-NOBMI-NEXT: andl (%esi), %eax 1400; X86-NOBMI-NEXT: andl 4(%esi), %edx 1401; X86-NOBMI-NEXT: popl %esi 1402; X86-NOBMI-NEXT: retl 1403; 1404; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: 1405; X86-BMI1BMI2: # %bb.0: 1406; X86-BMI1BMI2-NEXT: pushl %esi 1407; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1408; X86-BMI1BMI2-NEXT: movb $64, %cl 1409; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1410; X86-BMI1BMI2-NEXT: movl $-1, %eax 1411; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx 1412; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax 1413; X86-BMI1BMI2-NEXT: testb $32, %cl 1414; X86-BMI1BMI2-NEXT: je .LBB28_2 1415; X86-BMI1BMI2-NEXT: # %bb.1: 1416; X86-BMI1BMI2-NEXT: movl %edx, %eax 1417; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1418; X86-BMI1BMI2-NEXT: .LBB28_2: 1419; X86-BMI1BMI2-NEXT: andl (%esi), %eax 1420; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx 1421; X86-BMI1BMI2-NEXT: popl %esi 1422; X86-BMI1BMI2-NEXT: retl 1423; 1424; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: 1425; X64-NOBMI: # %bb.0: 1426; X64-NOBMI-NEXT: movq (%rdi), %rax 1427; X64-NOBMI-NEXT: movb $64, %cl 1428; X64-NOBMI-NEXT: subb %sil, %cl 1429; X64-NOBMI-NEXT: shlq %cl, %rax 1430; X64-NOBMI-NEXT: shrq %cl, %rax 1431; X64-NOBMI-NEXT: retq 1432; 1433; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: 1434; X64-BMI1BMI2: # %bb.0: 1435; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1436; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1437; X64-BMI1BMI2-NEXT: retq 1438 %val = load i64, i64* %w 1439 %numhighbits = sub i8 64, %numlowbits 1440 %sh_prom = zext i8 %numhighbits to i64 1441 %mask = lshr i64 -1, %sh_prom 1442 %masked = and i64 %mask, %val 1443 ret i64 %masked 1444} 1445 1446define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { 1447; X86-NOBMI-LABEL: bzhi64_c4_commutative: 1448; X86-NOBMI: # %bb.0: 1449; X86-NOBMI-NEXT: movl $64, %ecx 1450; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1451; X86-NOBMI-NEXT: movl $-1, %eax 1452; X86-NOBMI-NEXT: movl $-1, %edx 1453; X86-NOBMI-NEXT: shrl %cl, %edx 1454; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax 1455; X86-NOBMI-NEXT: testb $32, %cl 1456; X86-NOBMI-NEXT: je .LBB29_2 1457; X86-NOBMI-NEXT: # %bb.1: 1458; X86-NOBMI-NEXT: movl %edx, %eax 1459; X86-NOBMI-NEXT: xorl %edx, %edx 1460; X86-NOBMI-NEXT: .LBB29_2: 1461; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1462; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1463; X86-NOBMI-NEXT: retl 1464; 1465; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative: 1466; X86-BMI1BMI2: # %bb.0: 1467; X86-BMI1BMI2-NEXT: movl $64, %ecx 1468; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1469; X86-BMI1BMI2-NEXT: movl $-1, %eax 1470; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx 1471; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax 1472; X86-BMI1BMI2-NEXT: testb $32, %cl 1473; X86-BMI1BMI2-NEXT: je .LBB29_2 1474; X86-BMI1BMI2-NEXT: # %bb.1: 1475; X86-BMI1BMI2-NEXT: movl %edx, %eax 1476; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1477; X86-BMI1BMI2-NEXT: .LBB29_2: 1478; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1479; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1480; X86-BMI1BMI2-NEXT: retl 1481; 1482; X64-NOBMI-LABEL: bzhi64_c4_commutative: 1483; X64-NOBMI: # %bb.0: 1484; X64-NOBMI-NEXT: movl $64, %ecx 1485; X64-NOBMI-NEXT: subl %esi, %ecx 1486; X64-NOBMI-NEXT: shlq %cl, %rdi 1487; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1488; X64-NOBMI-NEXT: shrq %cl, %rdi 1489; X64-NOBMI-NEXT: movq %rdi, %rax 1490; X64-NOBMI-NEXT: retq 1491; 1492; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative: 1493; X64-BMI1BMI2: # %bb.0: 1494; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1495; X64-BMI1BMI2-NEXT: retq 1496 %numhighbits = sub i64 64, %numlowbits 1497 %mask = lshr i64 -1, %numhighbits 1498 %masked = and i64 %val, %mask ; swapped order 1499 ret i64 %masked 1500} 1501 1502; ---------------------------------------------------------------------------- ; 1503; Pattern d. 32-bit. 1504; ---------------------------------------------------------------------------- ; 1505 1506define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { 1507; X86-NOBMI-LABEL: bzhi32_d0: 1508; X86-NOBMI: # %bb.0: 1509; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1510; X86-NOBMI-NEXT: movl $32, %ecx 1511; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1512; X86-NOBMI-NEXT: shll %cl, %eax 1513; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1514; X86-NOBMI-NEXT: shrl %cl, %eax 1515; X86-NOBMI-NEXT: retl 1516; 1517; X86-BMI1BMI2-LABEL: bzhi32_d0: 1518; X86-BMI1BMI2: # %bb.0: 1519; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1520; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1521; X86-BMI1BMI2-NEXT: retl 1522; 1523; X64-NOBMI-LABEL: bzhi32_d0: 1524; X64-NOBMI: # %bb.0: 1525; X64-NOBMI-NEXT: movl $32, %ecx 1526; X64-NOBMI-NEXT: subl %esi, %ecx 1527; X64-NOBMI-NEXT: shll %cl, %edi 1528; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1529; X64-NOBMI-NEXT: shrl %cl, %edi 1530; X64-NOBMI-NEXT: movl %edi, %eax 1531; X64-NOBMI-NEXT: retq 1532; 1533; X64-BMI1BMI2-LABEL: bzhi32_d0: 1534; X64-BMI1BMI2: # %bb.0: 1535; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1536; X64-BMI1BMI2-NEXT: retq 1537 %numhighbits = sub i32 32, %numlowbits 1538 %highbitscleared = shl i32 %val, %numhighbits 1539 %masked = lshr i32 %highbitscleared, %numhighbits 1540 ret i32 %masked 1541} 1542 1543define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { 1544; X86-NOBMI-LABEL: bzhi32_d1_indexzext: 1545; X86-NOBMI: # %bb.0: 1546; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1547; X86-NOBMI-NEXT: movb $32, %cl 1548; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1549; X86-NOBMI-NEXT: shll %cl, %eax 1550; X86-NOBMI-NEXT: shrl %cl, %eax 1551; X86-NOBMI-NEXT: retl 1552; 1553; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext: 1554; X86-BMI1BMI2: # %bb.0: 1555; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1556; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1557; X86-BMI1BMI2-NEXT: retl 1558; 1559; X64-NOBMI-LABEL: bzhi32_d1_indexzext: 1560; X64-NOBMI: # %bb.0: 1561; X64-NOBMI-NEXT: movb $32, %cl 1562; X64-NOBMI-NEXT: subb %sil, %cl 1563; X64-NOBMI-NEXT: shll %cl, %edi 1564; X64-NOBMI-NEXT: shrl %cl, %edi 1565; X64-NOBMI-NEXT: movl %edi, %eax 1566; X64-NOBMI-NEXT: retq 1567; 1568; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext: 1569; X64-BMI1BMI2: # %bb.0: 1570; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax 1571; X64-BMI1BMI2-NEXT: retq 1572 %numhighbits = sub i8 32, %numlowbits 1573 %sh_prom = zext i8 %numhighbits to i32 1574 %highbitscleared = shl i32 %val, %sh_prom 1575 %masked = lshr i32 %highbitscleared, %sh_prom 1576 ret i32 %masked 1577} 1578 1579define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { 1580; X86-NOBMI-LABEL: bzhi32_d2_load: 1581; X86-NOBMI: # %bb.0: 1582; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1583; X86-NOBMI-NEXT: movl (%eax), %eax 1584; X86-NOBMI-NEXT: movl $32, %ecx 1585; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1586; X86-NOBMI-NEXT: shll %cl, %eax 1587; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1588; X86-NOBMI-NEXT: shrl %cl, %eax 1589; X86-NOBMI-NEXT: retl 1590; 1591; X86-BMI1BMI2-LABEL: bzhi32_d2_load: 1592; X86-BMI1BMI2: # %bb.0: 1593; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1594; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1595; X86-BMI1BMI2-NEXT: bzhil %eax, (%ecx), %eax 1596; X86-BMI1BMI2-NEXT: retl 1597; 1598; X64-NOBMI-LABEL: bzhi32_d2_load: 1599; X64-NOBMI: # %bb.0: 1600; X64-NOBMI-NEXT: movl (%rdi), %eax 1601; X64-NOBMI-NEXT: movl $32, %ecx 1602; X64-NOBMI-NEXT: subl %esi, %ecx 1603; X64-NOBMI-NEXT: shll %cl, %eax 1604; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1605; X64-NOBMI-NEXT: shrl %cl, %eax 1606; X64-NOBMI-NEXT: retq 1607; 1608; X64-BMI1BMI2-LABEL: bzhi32_d2_load: 1609; X64-BMI1BMI2: # %bb.0: 1610; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 1611; X64-BMI1BMI2-NEXT: retq 1612 %val = load i32, i32* %w 1613 %numhighbits = sub i32 32, %numlowbits 1614 %highbitscleared = shl i32 %val, %numhighbits 1615 %masked = lshr i32 %highbitscleared, %numhighbits 1616 ret i32 %masked 1617} 1618 1619define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { 1620; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext: 1621; X86-NOBMI: # %bb.0: 1622; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1623; X86-NOBMI-NEXT: movl (%eax), %eax 1624; X86-NOBMI-NEXT: movb $32, %cl 1625; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1626; X86-NOBMI-NEXT: shll %cl, %eax 1627; X86-NOBMI-NEXT: shrl %cl, %eax 1628; X86-NOBMI-NEXT: retl 1629; 1630; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: 1631; X86-BMI1BMI2: # %bb.0: 1632; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1633; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 1634; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax 1635; X86-BMI1BMI2-NEXT: retl 1636; 1637; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: 1638; X64-NOBMI: # %bb.0: 1639; X64-NOBMI-NEXT: movl (%rdi), %eax 1640; X64-NOBMI-NEXT: movb $32, %cl 1641; X64-NOBMI-NEXT: subb %sil, %cl 1642; X64-NOBMI-NEXT: shll %cl, %eax 1643; X64-NOBMI-NEXT: shrl %cl, %eax 1644; X64-NOBMI-NEXT: retq 1645; 1646; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: 1647; X64-BMI1BMI2: # %bb.0: 1648; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax 1649; X64-BMI1BMI2-NEXT: retq 1650 %val = load i32, i32* %w 1651 %numhighbits = sub i8 32, %numlowbits 1652 %sh_prom = zext i8 %numhighbits to i32 1653 %highbitscleared = shl i32 %val, %sh_prom 1654 %masked = lshr i32 %highbitscleared, %sh_prom 1655 ret i32 %masked 1656} 1657 1658; 64-bit. 1659 1660define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { 1661; X86-NOBMI-LABEL: bzhi64_d0: 1662; X86-NOBMI: # %bb.0: 1663; X86-NOBMI-NEXT: pushl %ebx 1664; X86-NOBMI-NEXT: pushl %edi 1665; X86-NOBMI-NEXT: pushl %esi 1666; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1667; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1668; X86-NOBMI-NEXT: movl $64, %ecx 1669; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1670; X86-NOBMI-NEXT: movl %edx, %esi 1671; X86-NOBMI-NEXT: shll %cl, %esi 1672; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 1673; X86-NOBMI-NEXT: testb $32, %cl 1674; X86-NOBMI-NEXT: movl %esi, %edi 1675; X86-NOBMI-NEXT: jne .LBB34_2 1676; X86-NOBMI-NEXT: # %bb.1: 1677; X86-NOBMI-NEXT: movl %eax, %edi 1678; X86-NOBMI-NEXT: .LBB34_2: 1679; X86-NOBMI-NEXT: movl %edi, %eax 1680; X86-NOBMI-NEXT: shrl %cl, %eax 1681; X86-NOBMI-NEXT: xorl %ebx, %ebx 1682; X86-NOBMI-NEXT: testb $32, %cl 1683; X86-NOBMI-NEXT: movl $0, %edx 1684; X86-NOBMI-NEXT: jne .LBB34_4 1685; X86-NOBMI-NEXT: # %bb.3: 1686; X86-NOBMI-NEXT: movl %esi, %ebx 1687; X86-NOBMI-NEXT: movl %eax, %edx 1688; X86-NOBMI-NEXT: .LBB34_4: 1689; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 1690; X86-NOBMI-NEXT: testb $32, %cl 1691; X86-NOBMI-NEXT: jne .LBB34_6 1692; X86-NOBMI-NEXT: # %bb.5: 1693; X86-NOBMI-NEXT: movl %ebx, %eax 1694; X86-NOBMI-NEXT: .LBB34_6: 1695; X86-NOBMI-NEXT: popl %esi 1696; X86-NOBMI-NEXT: popl %edi 1697; X86-NOBMI-NEXT: popl %ebx 1698; X86-NOBMI-NEXT: retl 1699; 1700; X86-BMI1BMI2-LABEL: bzhi64_d0: 1701; X86-BMI1BMI2: # %bb.0: 1702; X86-BMI1BMI2-NEXT: pushl %edi 1703; X86-BMI1BMI2-NEXT: pushl %esi 1704; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1705; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1706; X86-BMI1BMI2-NEXT: movl $64, %ecx 1707; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1708; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi 1709; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi 1710; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1711; X86-BMI1BMI2-NEXT: testb $32, %cl 1712; X86-BMI1BMI2-NEXT: je .LBB34_2 1713; X86-BMI1BMI2-NEXT: # %bb.1: 1714; X86-BMI1BMI2-NEXT: movl %edi, %esi 1715; X86-BMI1BMI2-NEXT: movl $0, %edi 1716; X86-BMI1BMI2-NEXT: .LBB34_2: 1717; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 1718; X86-BMI1BMI2-NEXT: jne .LBB34_4 1719; X86-BMI1BMI2-NEXT: # %bb.3: 1720; X86-BMI1BMI2-NEXT: movl %eax, %edx 1721; X86-BMI1BMI2-NEXT: .LBB34_4: 1722; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 1723; X86-BMI1BMI2-NEXT: testb $32, %cl 1724; X86-BMI1BMI2-NEXT: jne .LBB34_6 1725; X86-BMI1BMI2-NEXT: # %bb.5: 1726; X86-BMI1BMI2-NEXT: movl %edi, %eax 1727; X86-BMI1BMI2-NEXT: .LBB34_6: 1728; X86-BMI1BMI2-NEXT: popl %esi 1729; X86-BMI1BMI2-NEXT: popl %edi 1730; X86-BMI1BMI2-NEXT: retl 1731; 1732; X64-NOBMI-LABEL: bzhi64_d0: 1733; X64-NOBMI: # %bb.0: 1734; X64-NOBMI-NEXT: movl $64, %ecx 1735; X64-NOBMI-NEXT: subl %esi, %ecx 1736; X64-NOBMI-NEXT: shlq %cl, %rdi 1737; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1738; X64-NOBMI-NEXT: shrq %cl, %rdi 1739; X64-NOBMI-NEXT: movq %rdi, %rax 1740; X64-NOBMI-NEXT: retq 1741; 1742; X64-BMI1BMI2-LABEL: bzhi64_d0: 1743; X64-BMI1BMI2: # %bb.0: 1744; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1745; X64-BMI1BMI2-NEXT: retq 1746 %numhighbits = sub i64 64, %numlowbits 1747 %highbitscleared = shl i64 %val, %numhighbits 1748 %masked = lshr i64 %highbitscleared, %numhighbits 1749 ret i64 %masked 1750} 1751 1752define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { 1753; X86-NOBMI-LABEL: bzhi64_d1_indexzext: 1754; X86-NOBMI: # %bb.0: 1755; X86-NOBMI-NEXT: pushl %ebx 1756; X86-NOBMI-NEXT: pushl %edi 1757; X86-NOBMI-NEXT: pushl %esi 1758; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1759; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1760; X86-NOBMI-NEXT: movb $64, %cl 1761; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1762; X86-NOBMI-NEXT: movl %edx, %esi 1763; X86-NOBMI-NEXT: shll %cl, %esi 1764; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 1765; X86-NOBMI-NEXT: testb $32, %cl 1766; X86-NOBMI-NEXT: movl %esi, %edi 1767; X86-NOBMI-NEXT: jne .LBB35_2 1768; X86-NOBMI-NEXT: # %bb.1: 1769; X86-NOBMI-NEXT: movl %eax, %edi 1770; X86-NOBMI-NEXT: .LBB35_2: 1771; X86-NOBMI-NEXT: movl %edi, %eax 1772; X86-NOBMI-NEXT: shrl %cl, %eax 1773; X86-NOBMI-NEXT: xorl %ebx, %ebx 1774; X86-NOBMI-NEXT: testb $32, %cl 1775; X86-NOBMI-NEXT: movl $0, %edx 1776; X86-NOBMI-NEXT: jne .LBB35_4 1777; X86-NOBMI-NEXT: # %bb.3: 1778; X86-NOBMI-NEXT: movl %esi, %ebx 1779; X86-NOBMI-NEXT: movl %eax, %edx 1780; X86-NOBMI-NEXT: .LBB35_4: 1781; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 1782; X86-NOBMI-NEXT: testb $32, %cl 1783; X86-NOBMI-NEXT: jne .LBB35_6 1784; X86-NOBMI-NEXT: # %bb.5: 1785; X86-NOBMI-NEXT: movl %ebx, %eax 1786; X86-NOBMI-NEXT: .LBB35_6: 1787; X86-NOBMI-NEXT: popl %esi 1788; X86-NOBMI-NEXT: popl %edi 1789; X86-NOBMI-NEXT: popl %ebx 1790; X86-NOBMI-NEXT: retl 1791; 1792; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext: 1793; X86-BMI1BMI2: # %bb.0: 1794; X86-BMI1BMI2-NEXT: pushl %edi 1795; X86-BMI1BMI2-NEXT: pushl %esi 1796; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1797; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1798; X86-BMI1BMI2-NEXT: movb $64, %cl 1799; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1800; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi 1801; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi 1802; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1803; X86-BMI1BMI2-NEXT: testb $32, %cl 1804; X86-BMI1BMI2-NEXT: je .LBB35_2 1805; X86-BMI1BMI2-NEXT: # %bb.1: 1806; X86-BMI1BMI2-NEXT: movl %edi, %esi 1807; X86-BMI1BMI2-NEXT: movl $0, %edi 1808; X86-BMI1BMI2-NEXT: .LBB35_2: 1809; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 1810; X86-BMI1BMI2-NEXT: jne .LBB35_4 1811; X86-BMI1BMI2-NEXT: # %bb.3: 1812; X86-BMI1BMI2-NEXT: movl %eax, %edx 1813; X86-BMI1BMI2-NEXT: .LBB35_4: 1814; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 1815; X86-BMI1BMI2-NEXT: testb $32, %cl 1816; X86-BMI1BMI2-NEXT: jne .LBB35_6 1817; X86-BMI1BMI2-NEXT: # %bb.5: 1818; X86-BMI1BMI2-NEXT: movl %edi, %eax 1819; X86-BMI1BMI2-NEXT: .LBB35_6: 1820; X86-BMI1BMI2-NEXT: popl %esi 1821; X86-BMI1BMI2-NEXT: popl %edi 1822; X86-BMI1BMI2-NEXT: retl 1823; 1824; X64-NOBMI-LABEL: bzhi64_d1_indexzext: 1825; X64-NOBMI: # %bb.0: 1826; X64-NOBMI-NEXT: movb $64, %cl 1827; X64-NOBMI-NEXT: subb %sil, %cl 1828; X64-NOBMI-NEXT: shlq %cl, %rdi 1829; X64-NOBMI-NEXT: shrq %cl, %rdi 1830; X64-NOBMI-NEXT: movq %rdi, %rax 1831; X64-NOBMI-NEXT: retq 1832; 1833; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext: 1834; X64-BMI1BMI2: # %bb.0: 1835; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1836; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1837; X64-BMI1BMI2-NEXT: retq 1838 %numhighbits = sub i8 64, %numlowbits 1839 %sh_prom = zext i8 %numhighbits to i64 1840 %highbitscleared = shl i64 %val, %sh_prom 1841 %masked = lshr i64 %highbitscleared, %sh_prom 1842 ret i64 %masked 1843} 1844 1845define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { 1846; X86-NOBMI-LABEL: bzhi64_d2_load: 1847; X86-NOBMI: # %bb.0: 1848; X86-NOBMI-NEXT: pushl %ebx 1849; X86-NOBMI-NEXT: pushl %edi 1850; X86-NOBMI-NEXT: pushl %esi 1851; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1852; X86-NOBMI-NEXT: movl (%eax), %edx 1853; X86-NOBMI-NEXT: movl 4(%eax), %eax 1854; X86-NOBMI-NEXT: movl $64, %ecx 1855; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx 1856; X86-NOBMI-NEXT: movl %edx, %esi 1857; X86-NOBMI-NEXT: shll %cl, %esi 1858; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 1859; X86-NOBMI-NEXT: testb $32, %cl 1860; X86-NOBMI-NEXT: movl %esi, %edi 1861; X86-NOBMI-NEXT: jne .LBB36_2 1862; X86-NOBMI-NEXT: # %bb.1: 1863; X86-NOBMI-NEXT: movl %eax, %edi 1864; X86-NOBMI-NEXT: .LBB36_2: 1865; X86-NOBMI-NEXT: movl %edi, %eax 1866; X86-NOBMI-NEXT: shrl %cl, %eax 1867; X86-NOBMI-NEXT: xorl %ebx, %ebx 1868; X86-NOBMI-NEXT: testb $32, %cl 1869; X86-NOBMI-NEXT: movl $0, %edx 1870; X86-NOBMI-NEXT: jne .LBB36_4 1871; X86-NOBMI-NEXT: # %bb.3: 1872; X86-NOBMI-NEXT: movl %esi, %ebx 1873; X86-NOBMI-NEXT: movl %eax, %edx 1874; X86-NOBMI-NEXT: .LBB36_4: 1875; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 1876; X86-NOBMI-NEXT: testb $32, %cl 1877; X86-NOBMI-NEXT: jne .LBB36_6 1878; X86-NOBMI-NEXT: # %bb.5: 1879; X86-NOBMI-NEXT: movl %ebx, %eax 1880; X86-NOBMI-NEXT: .LBB36_6: 1881; X86-NOBMI-NEXT: popl %esi 1882; X86-NOBMI-NEXT: popl %edi 1883; X86-NOBMI-NEXT: popl %ebx 1884; X86-NOBMI-NEXT: retl 1885; 1886; X86-BMI1BMI2-LABEL: bzhi64_d2_load: 1887; X86-BMI1BMI2: # %bb.0: 1888; X86-BMI1BMI2-NEXT: pushl %edi 1889; X86-BMI1BMI2-NEXT: pushl %esi 1890; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1891; X86-BMI1BMI2-NEXT: movl (%eax), %edx 1892; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi 1893; X86-BMI1BMI2-NEXT: movl $64, %ecx 1894; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx 1895; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi 1896; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi 1897; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1898; X86-BMI1BMI2-NEXT: testb $32, %cl 1899; X86-BMI1BMI2-NEXT: je .LBB36_2 1900; X86-BMI1BMI2-NEXT: # %bb.1: 1901; X86-BMI1BMI2-NEXT: movl %edi, %esi 1902; X86-BMI1BMI2-NEXT: movl $0, %edi 1903; X86-BMI1BMI2-NEXT: .LBB36_2: 1904; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 1905; X86-BMI1BMI2-NEXT: jne .LBB36_4 1906; X86-BMI1BMI2-NEXT: # %bb.3: 1907; X86-BMI1BMI2-NEXT: movl %eax, %edx 1908; X86-BMI1BMI2-NEXT: .LBB36_4: 1909; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 1910; X86-BMI1BMI2-NEXT: testb $32, %cl 1911; X86-BMI1BMI2-NEXT: jne .LBB36_6 1912; X86-BMI1BMI2-NEXT: # %bb.5: 1913; X86-BMI1BMI2-NEXT: movl %edi, %eax 1914; X86-BMI1BMI2-NEXT: .LBB36_6: 1915; X86-BMI1BMI2-NEXT: popl %esi 1916; X86-BMI1BMI2-NEXT: popl %edi 1917; X86-BMI1BMI2-NEXT: retl 1918; 1919; X64-NOBMI-LABEL: bzhi64_d2_load: 1920; X64-NOBMI: # %bb.0: 1921; X64-NOBMI-NEXT: movq (%rdi), %rax 1922; X64-NOBMI-NEXT: movl $64, %ecx 1923; X64-NOBMI-NEXT: subl %esi, %ecx 1924; X64-NOBMI-NEXT: shlq %cl, %rax 1925; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1926; X64-NOBMI-NEXT: shrq %cl, %rax 1927; X64-NOBMI-NEXT: retq 1928; 1929; X64-BMI1BMI2-LABEL: bzhi64_d2_load: 1930; X64-BMI1BMI2: # %bb.0: 1931; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1932; X64-BMI1BMI2-NEXT: retq 1933 %val = load i64, i64* %w 1934 %numhighbits = sub i64 64, %numlowbits 1935 %highbitscleared = shl i64 %val, %numhighbits 1936 %masked = lshr i64 %highbitscleared, %numhighbits 1937 ret i64 %masked 1938} 1939 1940define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { 1941; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext: 1942; X86-NOBMI: # %bb.0: 1943; X86-NOBMI-NEXT: pushl %ebx 1944; X86-NOBMI-NEXT: pushl %edi 1945; X86-NOBMI-NEXT: pushl %esi 1946; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 1947; X86-NOBMI-NEXT: movl (%eax), %edx 1948; X86-NOBMI-NEXT: movl 4(%eax), %eax 1949; X86-NOBMI-NEXT: movb $64, %cl 1950; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 1951; X86-NOBMI-NEXT: movl %edx, %esi 1952; X86-NOBMI-NEXT: shll %cl, %esi 1953; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 1954; X86-NOBMI-NEXT: testb $32, %cl 1955; X86-NOBMI-NEXT: movl %esi, %edi 1956; X86-NOBMI-NEXT: jne .LBB37_2 1957; X86-NOBMI-NEXT: # %bb.1: 1958; X86-NOBMI-NEXT: movl %eax, %edi 1959; X86-NOBMI-NEXT: .LBB37_2: 1960; X86-NOBMI-NEXT: movl %edi, %eax 1961; X86-NOBMI-NEXT: shrl %cl, %eax 1962; X86-NOBMI-NEXT: xorl %ebx, %ebx 1963; X86-NOBMI-NEXT: testb $32, %cl 1964; X86-NOBMI-NEXT: movl $0, %edx 1965; X86-NOBMI-NEXT: jne .LBB37_4 1966; X86-NOBMI-NEXT: # %bb.3: 1967; X86-NOBMI-NEXT: movl %esi, %ebx 1968; X86-NOBMI-NEXT: movl %eax, %edx 1969; X86-NOBMI-NEXT: .LBB37_4: 1970; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 1971; X86-NOBMI-NEXT: testb $32, %cl 1972; X86-NOBMI-NEXT: jne .LBB37_6 1973; X86-NOBMI-NEXT: # %bb.5: 1974; X86-NOBMI-NEXT: movl %ebx, %eax 1975; X86-NOBMI-NEXT: .LBB37_6: 1976; X86-NOBMI-NEXT: popl %esi 1977; X86-NOBMI-NEXT: popl %edi 1978; X86-NOBMI-NEXT: popl %ebx 1979; X86-NOBMI-NEXT: retl 1980; 1981; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: 1982; X86-BMI1BMI2: # %bb.0: 1983; X86-BMI1BMI2-NEXT: pushl %edi 1984; X86-BMI1BMI2-NEXT: pushl %esi 1985; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1986; X86-BMI1BMI2-NEXT: movl (%eax), %edx 1987; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi 1988; X86-BMI1BMI2-NEXT: movb $64, %cl 1989; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1990; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi 1991; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi 1992; X86-BMI1BMI2-NEXT: xorl %edx, %edx 1993; X86-BMI1BMI2-NEXT: testb $32, %cl 1994; X86-BMI1BMI2-NEXT: je .LBB37_2 1995; X86-BMI1BMI2-NEXT: # %bb.1: 1996; X86-BMI1BMI2-NEXT: movl %edi, %esi 1997; X86-BMI1BMI2-NEXT: movl $0, %edi 1998; X86-BMI1BMI2-NEXT: .LBB37_2: 1999; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax 2000; X86-BMI1BMI2-NEXT: jne .LBB37_4 2001; X86-BMI1BMI2-NEXT: # %bb.3: 2002; X86-BMI1BMI2-NEXT: movl %eax, %edx 2003; X86-BMI1BMI2-NEXT: .LBB37_4: 2004; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi 2005; X86-BMI1BMI2-NEXT: testb $32, %cl 2006; X86-BMI1BMI2-NEXT: jne .LBB37_6 2007; X86-BMI1BMI2-NEXT: # %bb.5: 2008; X86-BMI1BMI2-NEXT: movl %edi, %eax 2009; X86-BMI1BMI2-NEXT: .LBB37_6: 2010; X86-BMI1BMI2-NEXT: popl %esi 2011; X86-BMI1BMI2-NEXT: popl %edi 2012; X86-BMI1BMI2-NEXT: retl 2013; 2014; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: 2015; X64-NOBMI: # %bb.0: 2016; X64-NOBMI-NEXT: movq (%rdi), %rax 2017; X64-NOBMI-NEXT: movb $64, %cl 2018; X64-NOBMI-NEXT: subb %sil, %cl 2019; X64-NOBMI-NEXT: shlq %cl, %rax 2020; X64-NOBMI-NEXT: shrq %cl, %rax 2021; X64-NOBMI-NEXT: retq 2022; 2023; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: 2024; X64-BMI1BMI2: # %bb.0: 2025; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi 2026; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 2027; X64-BMI1BMI2-NEXT: retq 2028 %val = load i64, i64* %w 2029 %numhighbits = sub i8 64, %numlowbits 2030 %sh_prom = zext i8 %numhighbits to i64 2031 %highbitscleared = shl i64 %val, %sh_prom 2032 %masked = lshr i64 %highbitscleared, %sh_prom 2033 ret i64 %masked 2034} 2035 2036; ---------------------------------------------------------------------------- ; 2037; Constant mask 2038; ---------------------------------------------------------------------------- ; 2039 2040; 32-bit 2041 2042define i32 @bzhi32_constant_mask32(i32 %val) nounwind { 2043; X86-LABEL: bzhi32_constant_mask32: 2044; X86: # %bb.0: 2045; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 2046; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 2047; X86-NEXT: retl 2048; 2049; X64-LABEL: bzhi32_constant_mask32: 2050; X64: # %bb.0: 2051; X64-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF 2052; X64-NEXT: movl %edi, %eax 2053; X64-NEXT: retq 2054 %masked = and i32 %val, 2147483647 2055 ret i32 %masked 2056} 2057 2058define i32 @bzhi32_constant_mask32_load(i32* %val) nounwind { 2059; X86-LABEL: bzhi32_constant_mask32_load: 2060; X86: # %bb.0: 2061; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 2062; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 2063; X86-NEXT: andl (%ecx), %eax 2064; X86-NEXT: retl 2065; 2066; X64-LABEL: bzhi32_constant_mask32_load: 2067; X64: # %bb.0: 2068; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 2069; X64-NEXT: andl (%rdi), %eax 2070; X64-NEXT: retq 2071 %val1 = load i32, i32* %val 2072 %masked = and i32 %val1, 2147483647 2073 ret i32 %masked 2074} 2075 2076define i32 @bzhi32_constant_mask16(i32 %val) nounwind { 2077; X86-LABEL: bzhi32_constant_mask16: 2078; X86: # %bb.0: 2079; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 2080; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 2081; X86-NEXT: retl 2082; 2083; X64-LABEL: bzhi32_constant_mask16: 2084; X64: # %bb.0: 2085; X64-NEXT: andl $32767, %edi # imm = 0x7FFF 2086; X64-NEXT: movl %edi, %eax 2087; X64-NEXT: retq 2088 %masked = and i32 %val, 32767 2089 ret i32 %masked 2090} 2091 2092define i32 @bzhi32_constant_mask16_load(i32* %val) nounwind { 2093; X86-LABEL: bzhi32_constant_mask16_load: 2094; X86: # %bb.0: 2095; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 2096; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 2097; X86-NEXT: andl (%ecx), %eax 2098; X86-NEXT: retl 2099; 2100; X64-LABEL: bzhi32_constant_mask16_load: 2101; X64: # %bb.0: 2102; X64-NEXT: movl $32767, %eax # imm = 0x7FFF 2103; X64-NEXT: andl (%rdi), %eax 2104; X64-NEXT: retq 2105 %val1 = load i32, i32* %val 2106 %masked = and i32 %val1, 32767 2107 ret i32 %masked 2108} 2109 2110define i32 @bzhi32_constant_mask8(i32 %val) nounwind { 2111; X86-LABEL: bzhi32_constant_mask8: 2112; X86: # %bb.0: 2113; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2114; X86-NEXT: andl $127, %eax 2115; X86-NEXT: retl 2116; 2117; X64-LABEL: bzhi32_constant_mask8: 2118; X64: # %bb.0: 2119; X64-NEXT: andl $127, %edi 2120; X64-NEXT: movl %edi, %eax 2121; X64-NEXT: retq 2122 %masked = and i32 %val, 127 2123 ret i32 %masked 2124} 2125 2126define i32 @bzhi32_constant_mask8_load(i32* %val) nounwind { 2127; X86-LABEL: bzhi32_constant_mask8_load: 2128; X86: # %bb.0: 2129; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2130; X86-NEXT: movl (%eax), %eax 2131; X86-NEXT: andl $127, %eax 2132; X86-NEXT: retl 2133; 2134; X64-LABEL: bzhi32_constant_mask8_load: 2135; X64: # %bb.0: 2136; X64-NEXT: movl (%rdi), %eax 2137; X64-NEXT: andl $127, %eax 2138; X64-NEXT: retq 2139 %val1 = load i32, i32* %val 2140 %masked = and i32 %val1, 127 2141 ret i32 %masked 2142} 2143 2144; 64-bit 2145 2146define i64 @bzhi64_constant_mask64(i64 %val) nounwind { 2147; X86-LABEL: bzhi64_constant_mask64: 2148; X86: # %bb.0: 2149; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2150; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 2151; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 2152; X86-NEXT: retl 2153; 2154; X64-NOBMI-LABEL: bzhi64_constant_mask64: 2155; X64-NOBMI: # %bb.0: 2156; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 2157; X64-NOBMI-NEXT: andq %rdi, %rax 2158; X64-NOBMI-NEXT: retq 2159; 2160; X64-BMI1TBM-LABEL: bzhi64_constant_mask64: 2161; X64-BMI1TBM: # %bb.0: 2162; X64-BMI1TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 2163; X64-BMI1TBM-NEXT: retq 2164; 2165; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64: 2166; X64-BMI1NOTBMBMI2: # %bb.0: 2167; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al 2168; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, %rdi, %rax 2169; X64-BMI1NOTBMBMI2-NEXT: retq 2170 %masked = and i64 %val, 4611686018427387903 2171 ret i64 %masked 2172} 2173 2174define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind { 2175; X86-LABEL: bzhi64_constant_mask64_load: 2176; X86: # %bb.0: 2177; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 2178; X86-NEXT: movl (%ecx), %eax 2179; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 2180; X86-NEXT: andl 4(%ecx), %edx 2181; X86-NEXT: retl 2182; 2183; X64-NOBMI-LABEL: bzhi64_constant_mask64_load: 2184; X64-NOBMI: # %bb.0: 2185; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 2186; X64-NOBMI-NEXT: andq (%rdi), %rax 2187; X64-NOBMI-NEXT: retq 2188; 2189; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load: 2190; X64-BMI1TBM: # %bb.0: 2191; X64-BMI1TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 2192; X64-BMI1TBM-NEXT: retq 2193; 2194; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64_load: 2195; X64-BMI1NOTBMBMI2: # %bb.0: 2196; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al 2197; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, (%rdi), %rax 2198; X64-BMI1NOTBMBMI2-NEXT: retq 2199 %val1 = load i64, i64* %val 2200 %masked = and i64 %val1, 4611686018427387903 2201 ret i64 %masked 2202} 2203 2204define i64 @bzhi64_constant_mask32(i64 %val) nounwind { 2205; X86-LABEL: bzhi64_constant_mask32: 2206; X86: # %bb.0: 2207; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 2208; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 2209; X86-NEXT: xorl %edx, %edx 2210; X86-NEXT: retl 2211; 2212; X64-LABEL: bzhi64_constant_mask32: 2213; X64: # %bb.0: 2214; X64-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF 2215; X64-NEXT: movq %rdi, %rax 2216; X64-NEXT: retq 2217 %masked = and i64 %val, 2147483647 2218 ret i64 %masked 2219} 2220 2221define i64 @bzhi64_constant_mask32_load(i64* %val) nounwind { 2222; X86-LABEL: bzhi64_constant_mask32_load: 2223; X86: # %bb.0: 2224; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 2225; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 2226; X86-NEXT: andl (%ecx), %eax 2227; X86-NEXT: xorl %edx, %edx 2228; X86-NEXT: retl 2229; 2230; X64-LABEL: bzhi64_constant_mask32_load: 2231; X64: # %bb.0: 2232; X64-NEXT: movq (%rdi), %rax 2233; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 2234; X64-NEXT: retq 2235 %val1 = load i64, i64* %val 2236 %masked = and i64 %val1, 2147483647 2237 ret i64 %masked 2238} 2239 2240define i64 @bzhi64_constant_mask16(i64 %val) nounwind { 2241; X86-LABEL: bzhi64_constant_mask16: 2242; X86: # %bb.0: 2243; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 2244; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 2245; X86-NEXT: xorl %edx, %edx 2246; X86-NEXT: retl 2247; 2248; X64-LABEL: bzhi64_constant_mask16: 2249; X64: # %bb.0: 2250; X64-NEXT: andl $32767, %edi # imm = 0x7FFF 2251; X64-NEXT: movq %rdi, %rax 2252; X64-NEXT: retq 2253 %masked = and i64 %val, 32767 2254 ret i64 %masked 2255} 2256 2257define i64 @bzhi64_constant_mask16_load(i64* %val) nounwind { 2258; X86-LABEL: bzhi64_constant_mask16_load: 2259; X86: # %bb.0: 2260; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 2261; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 2262; X86-NEXT: andl (%ecx), %eax 2263; X86-NEXT: xorl %edx, %edx 2264; X86-NEXT: retl 2265; 2266; X64-LABEL: bzhi64_constant_mask16_load: 2267; X64: # %bb.0: 2268; X64-NEXT: movq (%rdi), %rax 2269; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 2270; X64-NEXT: retq 2271 %val1 = load i64, i64* %val 2272 %masked = and i64 %val1, 32767 2273 ret i64 %masked 2274} 2275 2276define i64 @bzhi64_constant_mask8(i64 %val) nounwind { 2277; X86-LABEL: bzhi64_constant_mask8: 2278; X86: # %bb.0: 2279; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2280; X86-NEXT: andl $127, %eax 2281; X86-NEXT: xorl %edx, %edx 2282; X86-NEXT: retl 2283; 2284; X64-LABEL: bzhi64_constant_mask8: 2285; X64: # %bb.0: 2286; X64-NEXT: andl $127, %edi 2287; X64-NEXT: movq %rdi, %rax 2288; X64-NEXT: retq 2289 %masked = and i64 %val, 127 2290 ret i64 %masked 2291} 2292 2293define i64 @bzhi64_constant_mask8_load(i64* %val) nounwind { 2294; X86-LABEL: bzhi64_constant_mask8_load: 2295; X86: # %bb.0: 2296; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2297; X86-NEXT: movl (%eax), %eax 2298; X86-NEXT: andl $127, %eax 2299; X86-NEXT: xorl %edx, %edx 2300; X86-NEXT: retl 2301; 2302; X64-LABEL: bzhi64_constant_mask8_load: 2303; X64: # %bb.0: 2304; X64-NEXT: movq (%rdi), %rax 2305; X64-NEXT: andl $127, %eax 2306; X64-NEXT: retq 2307 %val1 = load i64, i64* %val 2308 %masked = and i64 %val1, 127 2309 ret i64 %masked 2310} 2311