1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s 3; RUN: llc -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM %s 4 5; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll, 6; but with all 64-bit tests, and tests with loads dropped. 7 8; Patterns: 9; a) x & (1 << nbits) - 1 10; b) x & ~(-1 << nbits) 11; c) x & (-1 >> (32 - y)) 12; d) x << (32 - y) >> (32 - y) 13; are equivalent. 14 15; ---------------------------------------------------------------------------- ; 16; Pattern a. 32-bit 17; ---------------------------------------------------------------------------- ; 18 19define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 20; EG-LABEL: bzhi32_a0: 21; EG: ; %bb.0: 22; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 23; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 24; EG-NEXT: CF_END 25; EG-NEXT: PAD 26; EG-NEXT: ALU clause starting at 4: 27; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 28; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 29; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 30; 31; CM-LABEL: bzhi32_a0: 32; CM: ; %bb.0: 33; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 34; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 35; CM-NEXT: CF_END 36; CM-NEXT: PAD 37; CM-NEXT: ALU clause starting at 4: 38; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 39; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 40; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 41 %onebit = shl i32 1, %numlowbits 42 %mask = add nsw i32 %onebit, -1 43 %masked = and i32 %mask, %val 44 store i32 %masked, i32 addrspace(1)* %out 45 ret void 46} 47 48define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) { 49; EG-LABEL: bzhi32_a1_indexzext: 50; EG: ; %bb.0: 51; EG-NEXT: ALU 0, @8, KC0[], KC1[] 52; EG-NEXT: TEX 0 @6 53; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 54; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 55; EG-NEXT: CF_END 56; EG-NEXT: PAD 57; EG-NEXT: Fetch clause starting at 6: 58; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 59; EG-NEXT: ALU clause starting at 8: 60; EG-NEXT: MOV * T0.X, 0.0, 61; EG-NEXT: ALU clause starting at 9: 62; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 63; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 64; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W, 65; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 66; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 67; 68; CM-LABEL: bzhi32_a1_indexzext: 69; CM: ; %bb.0: 70; CM-NEXT: ALU 0, @8, KC0[], KC1[] 71; CM-NEXT: TEX 0 @6 72; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 73; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 74; CM-NEXT: CF_END 75; CM-NEXT: PAD 76; CM-NEXT: Fetch clause starting at 6: 77; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 78; CM-NEXT: ALU clause starting at 8: 79; CM-NEXT: MOV * T0.X, 0.0, 80; CM-NEXT: ALU clause starting at 9: 81; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 82; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 83; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W, 84; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 85; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 86 %conv = zext i8 %numlowbits to i32 87 %onebit = shl i32 1, %conv 88 %mask = add nsw i32 %onebit, -1 89 %masked = and i32 %mask, %val 90 store i32 %masked, i32 addrspace(1)* %out 91 ret void 92} 93 94define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 95; EG-LABEL: bzhi32_a4_commutative: 96; EG: ; %bb.0: 97; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 98; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 99; EG-NEXT: CF_END 100; EG-NEXT: PAD 101; EG-NEXT: ALU clause starting at 4: 102; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 103; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 104; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 105; 106; CM-LABEL: bzhi32_a4_commutative: 107; CM: ; %bb.0: 108; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 109; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 110; CM-NEXT: CF_END 111; CM-NEXT: PAD 112; CM-NEXT: ALU clause starting at 4: 113; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 114; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 115; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 116 %onebit = shl i32 1, %numlowbits 117 %mask = add nsw i32 %onebit, -1 118 %masked = and i32 %val, %mask ; swapped order 119 store i32 %masked, i32 addrspace(1)* %out 120 ret void 121} 122 123; ---------------------------------------------------------------------------- ; 124; Pattern b. 32-bit 125; ---------------------------------------------------------------------------- ; 126 127define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 128; EG-LABEL: bzhi32_b0: 129; EG: ; %bb.0: 130; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 131; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 132; EG-NEXT: CF_END 133; EG-NEXT: PAD 134; EG-NEXT: ALU clause starting at 4: 135; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 136; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 137; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 138; 139; CM-LABEL: bzhi32_b0: 140; CM: ; %bb.0: 141; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 142; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 143; CM-NEXT: CF_END 144; CM-NEXT: PAD 145; CM-NEXT: ALU clause starting at 4: 146; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 147; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 148; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 149 %notmask = shl i32 -1, %numlowbits 150 %mask = xor i32 %notmask, -1 151 %masked = and i32 %mask, %val 152 store i32 %masked, i32 addrspace(1)* %out 153 ret void 154} 155 156define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) { 157; EG-LABEL: bzhi32_b1_indexzext: 158; EG: ; %bb.0: 159; EG-NEXT: ALU 0, @8, KC0[], KC1[] 160; EG-NEXT: TEX 0 @6 161; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 162; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 163; EG-NEXT: CF_END 164; EG-NEXT: PAD 165; EG-NEXT: Fetch clause starting at 6: 166; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 167; EG-NEXT: ALU clause starting at 8: 168; EG-NEXT: MOV * T0.X, 0.0, 169; EG-NEXT: ALU clause starting at 9: 170; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 171; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 172; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W, 173; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 174; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 175; 176; CM-LABEL: bzhi32_b1_indexzext: 177; CM: ; %bb.0: 178; CM-NEXT: ALU 0, @8, KC0[], KC1[] 179; CM-NEXT: TEX 0 @6 180; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 181; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 182; CM-NEXT: CF_END 183; CM-NEXT: PAD 184; CM-NEXT: Fetch clause starting at 6: 185; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 186; CM-NEXT: ALU clause starting at 8: 187; CM-NEXT: MOV * T0.X, 0.0, 188; CM-NEXT: ALU clause starting at 9: 189; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 190; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 191; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W, 192; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 193; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 194 %conv = zext i8 %numlowbits to i32 195 %notmask = shl i32 -1, %conv 196 %mask = xor i32 %notmask, -1 197 %masked = and i32 %mask, %val 198 store i32 %masked, i32 addrspace(1)* %out 199 ret void 200} 201 202define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 203; EG-LABEL: bzhi32_b4_commutative: 204; EG: ; %bb.0: 205; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 206; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 207; EG-NEXT: CF_END 208; EG-NEXT: PAD 209; EG-NEXT: ALU clause starting at 4: 210; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 211; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 212; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 213; 214; CM-LABEL: bzhi32_b4_commutative: 215; CM: ; %bb.0: 216; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 217; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 218; CM-NEXT: CF_END 219; CM-NEXT: PAD 220; CM-NEXT: ALU clause starting at 4: 221; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 222; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 223; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 224 %notmask = shl i32 -1, %numlowbits 225 %mask = xor i32 %notmask, -1 226 %masked = and i32 %val, %mask ; swapped order 227 store i32 %masked, i32 addrspace(1)* %out 228 ret void 229} 230 231; ---------------------------------------------------------------------------- ; 232; Pattern c. 32-bit 233; ---------------------------------------------------------------------------- ; 234 235define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 236; EG-LABEL: bzhi32_c0: 237; EG: ; %bb.0: 238; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 239; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 240; EG-NEXT: CF_END 241; EG-NEXT: PAD 242; EG-NEXT: ALU clause starting at 4: 243; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 244; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 245; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 246; 247; CM-LABEL: bzhi32_c0: 248; CM: ; %bb.0: 249; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 250; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 251; CM-NEXT: CF_END 252; CM-NEXT: PAD 253; CM-NEXT: ALU clause starting at 4: 254; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 255; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 256; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 257 %numhighbits = sub i32 32, %numlowbits 258 %mask = lshr i32 -1, %numhighbits 259 %masked = and i32 %mask, %val 260 store i32 %masked, i32 addrspace(1)* %out 261 ret void 262} 263 264define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) { 265; EG-LABEL: bzhi32_c1_indexzext: 266; EG: ; %bb.0: 267; EG-NEXT: ALU 0, @8, KC0[], KC1[] 268; EG-NEXT: TEX 0 @6 269; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 270; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 271; EG-NEXT: CF_END 272; EG-NEXT: PAD 273; EG-NEXT: Fetch clause starting at 6: 274; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 275; EG-NEXT: ALU clause starting at 8: 276; EG-NEXT: MOV * T0.X, 0.0, 277; EG-NEXT: ALU clause starting at 9: 278; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X, 279; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 280; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 281; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 282; EG-NEXT: LSHR * T0.W, literal.x, PV.W, 283; EG-NEXT: -1(nan), 0(0.000000e+00) 284; EG-NEXT: AND_INT T0.X, PV.W, KC0[2].Y, 285; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 286; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 287; 288; CM-LABEL: bzhi32_c1_indexzext: 289; CM: ; %bb.0: 290; CM-NEXT: ALU 0, @8, KC0[], KC1[] 291; CM-NEXT: TEX 0 @6 292; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 293; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 294; CM-NEXT: CF_END 295; CM-NEXT: PAD 296; CM-NEXT: Fetch clause starting at 6: 297; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 298; CM-NEXT: ALU clause starting at 8: 299; CM-NEXT: MOV * T0.X, 0.0, 300; CM-NEXT: ALU clause starting at 9: 301; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X, 302; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 303; CM-NEXT: AND_INT * T0.W, PV.W, literal.x, 304; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00) 305; CM-NEXT: LSHR * T0.W, literal.x, PV.W, 306; CM-NEXT: -1(nan), 0(0.000000e+00) 307; CM-NEXT: AND_INT * T0.X, PV.W, KC0[2].Y, 308; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 309; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 310 %numhighbits = sub i8 32, %numlowbits 311 %sh_prom = zext i8 %numhighbits to i32 312 %mask = lshr i32 -1, %sh_prom 313 %masked = and i32 %mask, %val 314 store i32 %masked, i32 addrspace(1)* %out 315 ret void 316} 317 318define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 319; EG-LABEL: bzhi32_c4_commutative: 320; EG: ; %bb.0: 321; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 322; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 323; EG-NEXT: CF_END 324; EG-NEXT: PAD 325; EG-NEXT: ALU clause starting at 4: 326; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 327; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 328; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 329; 330; CM-LABEL: bzhi32_c4_commutative: 331; CM: ; %bb.0: 332; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 333; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 334; CM-NEXT: CF_END 335; CM-NEXT: PAD 336; CM-NEXT: ALU clause starting at 4: 337; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 338; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 339; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 340 %numhighbits = sub i32 32, %numlowbits 341 %mask = lshr i32 -1, %numhighbits 342 %masked = and i32 %val, %mask ; swapped order 343 store i32 %masked, i32 addrspace(1)* %out 344 ret void 345} 346 347; ---------------------------------------------------------------------------- ; 348; Pattern d. 32-bit. 349; ---------------------------------------------------------------------------- ; 350 351define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 352; EG-LABEL: bzhi32_d0: 353; EG: ; %bb.0: 354; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 355; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 356; EG-NEXT: CF_END 357; EG-NEXT: PAD 358; EG-NEXT: ALU clause starting at 4: 359; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 360; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 361; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 362; 363; CM-LABEL: bzhi32_d0: 364; CM: ; %bb.0: 365; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 366; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 367; CM-NEXT: CF_END 368; CM-NEXT: PAD 369; CM-NEXT: ALU clause starting at 4: 370; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 371; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 372; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 373 %numhighbits = sub i32 32, %numlowbits 374 %highbitscleared = shl i32 %val, %numhighbits 375 %masked = lshr i32 %highbitscleared, %numhighbits 376 store i32 %masked, i32 addrspace(1)* %out 377 ret void 378} 379 380define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) { 381; EG-LABEL: bzhi32_d1_indexzext: 382; EG: ; %bb.0: 383; EG-NEXT: ALU 0, @8, KC0[], KC1[] 384; EG-NEXT: TEX 0 @6 385; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 386; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 387; EG-NEXT: CF_END 388; EG-NEXT: PAD 389; EG-NEXT: Fetch clause starting at 6: 390; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 391; EG-NEXT: ALU clause starting at 8: 392; EG-NEXT: MOV * T0.X, 0.0, 393; EG-NEXT: ALU clause starting at 9: 394; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X, 395; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 396; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 397; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 398; EG-NEXT: LSHL * T1.W, KC0[2].Y, PV.W, 399; EG-NEXT: LSHR T0.X, PV.W, T0.W, 400; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 401; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 402; 403; CM-LABEL: bzhi32_d1_indexzext: 404; CM: ; %bb.0: 405; CM-NEXT: ALU 0, @8, KC0[], KC1[] 406; CM-NEXT: TEX 0 @6 407; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 408; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 409; CM-NEXT: CF_END 410; CM-NEXT: PAD 411; CM-NEXT: Fetch clause starting at 6: 412; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 413; CM-NEXT: ALU clause starting at 8: 414; CM-NEXT: MOV * T0.X, 0.0, 415; CM-NEXT: ALU clause starting at 9: 416; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X, 417; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 418; CM-NEXT: AND_INT * T0.W, PV.W, literal.x, 419; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00) 420; CM-NEXT: LSHL * T1.W, KC0[2].Y, PV.W, 421; CM-NEXT: LSHR * T0.X, PV.W, T0.W, 422; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 423; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 424 %numhighbits = sub i8 32, %numlowbits 425 %sh_prom = zext i8 %numhighbits to i32 426 %highbitscleared = shl i32 %val, %sh_prom 427 %masked = lshr i32 %highbitscleared, %sh_prom 428 store i32 %masked, i32 addrspace(1)* %out 429 ret void 430} 431