1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5 6define i8 @v_lshr_i8(i8 %value, i8 %amount) { 7; GFX6-LABEL: v_lshr_i8: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: s_movk_i32 s4, 0xff 11; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 12; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 13; GFX6-NEXT: v_lshrrev_b32_e32 v0, v1, v0 14; GFX6-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX8-LABEL: v_lshr_i8: 17; GFX8: ; %bb.0: 18; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 20; GFX8-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX9-LABEL: v_lshr_i8: 23; GFX9: ; %bb.0: 24; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX9-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 26; GFX9-NEXT: s_setpc_b64 s[30:31] 27 %result = lshr i8 %value, %amount 28 ret i8 %result 29} 30 31define i8 @v_lshr_i8_7(i8 %value) { 32; GFX6-LABEL: v_lshr_i8_7: 33; GFX6: ; %bb.0: 34; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 36; GFX6-NEXT: v_lshrrev_b32_e32 v0, 7, v0 37; GFX6-NEXT: s_setpc_b64 s[30:31] 38; 39; GFX8-LABEL: v_lshr_i8_7: 40; GFX8: ; %bb.0: 41; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GFX8-NEXT: v_mov_b32_e32 v1, 7 43; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 44; GFX8-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX9-LABEL: v_lshr_i8_7: 47; GFX9: ; %bb.0: 48; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX9-NEXT: v_mov_b32_e32 v1, 7 50; GFX9-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 51; GFX9-NEXT: s_setpc_b64 s[30:31] 52 %result = lshr i8 %value, 7 53 ret i8 %result 54} 55 56define amdgpu_ps i8 @s_lshr_i8(i8 inreg %value, i8 inreg %amount) { 57; GFX6-LABEL: s_lshr_i8: 58; GFX6: ; %bb.0: 59; GFX6-NEXT: s_movk_i32 s2, 0xff 60; GFX6-NEXT: s_and_b32 s1, s1, s2 61; GFX6-NEXT: s_and_b32 s0, s0, s2 62; GFX6-NEXT: s_lshr_b32 s0, s0, s1 63; GFX6-NEXT: ; return to shader part epilog 64; 65; GFX8-LABEL: s_lshr_i8: 66; GFX8: ; %bb.0: 67; GFX8-NEXT: s_movk_i32 s2, 0xff 68; GFX8-NEXT: s_and_b32 s0, s0, s2 69; GFX8-NEXT: s_and_b32 s1, s1, s2 70; GFX8-NEXT: s_lshr_b32 s0, s0, s1 71; GFX8-NEXT: ; return to shader part epilog 72; 73; GFX9-LABEL: s_lshr_i8: 74; GFX9: ; %bb.0: 75; GFX9-NEXT: s_movk_i32 s2, 0xff 76; GFX9-NEXT: s_and_b32 s0, s0, s2 77; GFX9-NEXT: s_and_b32 s1, s1, s2 78; GFX9-NEXT: s_lshr_b32 s0, s0, s1 79; GFX9-NEXT: ; return to shader part epilog 80 %result = lshr i8 %value, %amount 81 ret i8 %result 82} 83 84define amdgpu_ps i8 @s_lshr_i8_7(i8 inreg %value) { 85; GCN-LABEL: s_lshr_i8_7: 86; GCN: ; %bb.0: 87; GCN-NEXT: s_and_b32 s0, s0, 0xff 88; GCN-NEXT: s_lshr_b32 s0, s0, 7 89; GCN-NEXT: ; return to shader part epilog 90 %result = lshr i8 %value, 7 91 ret i8 %result 92} 93 94 95define i24 @v_lshr_i24(i24 %value, i24 %amount) { 96; GCN-LABEL: v_lshr_i24: 97; GCN: ; %bb.0: 98; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GCN-NEXT: s_mov_b32 s4, 0xffffff 100; GCN-NEXT: v_and_b32_e32 v1, s4, v1 101; GCN-NEXT: v_and_b32_e32 v0, s4, v0 102; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 103; GCN-NEXT: s_setpc_b64 s[30:31] 104 %result = lshr i24 %value, %amount 105 ret i24 %result 106} 107 108define i24 @v_lshr_i24_7(i24 %value) { 109; GCN-LABEL: v_lshr_i24_7: 110; GCN: ; %bb.0: 111; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GCN-NEXT: v_and_b32_e32 v0, 0xffffff, v0 113; GCN-NEXT: v_lshrrev_b32_e32 v0, 7, v0 114; GCN-NEXT: s_setpc_b64 s[30:31] 115 %result = lshr i24 %value, 7 116 ret i24 %result 117} 118 119define amdgpu_ps i24 @s_lshr_i24(i24 inreg %value, i24 inreg %amount) { 120; GCN-LABEL: s_lshr_i24: 121; GCN: ; %bb.0: 122; GCN-NEXT: s_mov_b32 s2, 0xffffff 123; GCN-NEXT: s_and_b32 s1, s1, s2 124; GCN-NEXT: s_and_b32 s0, s0, s2 125; GCN-NEXT: s_lshr_b32 s0, s0, s1 126; GCN-NEXT: ; return to shader part epilog 127 %result = lshr i24 %value, %amount 128 ret i24 %result 129} 130 131define amdgpu_ps i24 @s_lshr_i24_7(i24 inreg %value) { 132; GCN-LABEL: s_lshr_i24_7: 133; GCN: ; %bb.0: 134; GCN-NEXT: s_and_b32 s0, s0, 0xffffff 135; GCN-NEXT: s_lshr_b32 s0, s0, 7 136; GCN-NEXT: ; return to shader part epilog 137 %result = lshr i24 %value, 7 138 ret i24 %result 139} 140 141define i32 @v_lshr_i32(i32 %value, i32 %amount) { 142; GCN-LABEL: v_lshr_i32: 143; GCN: ; %bb.0: 144; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 146; GCN-NEXT: s_setpc_b64 s[30:31] 147 %result = lshr i32 %value, %amount 148 ret i32 %result 149} 150 151define i32 @v_lshr_i32_31(i32 %value) { 152; GCN-LABEL: v_lshr_i32_31: 153; GCN: ; %bb.0: 154; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v0 156; GCN-NEXT: s_setpc_b64 s[30:31] 157 %result = lshr i32 %value, 31 158 ret i32 %result 159} 160 161define amdgpu_ps i32 @s_lshr_i32(i32 inreg %value, i32 inreg %amount) { 162; GCN-LABEL: s_lshr_i32: 163; GCN: ; %bb.0: 164; GCN-NEXT: s_lshr_b32 s0, s0, s1 165; GCN-NEXT: ; return to shader part epilog 166 %result = lshr i32 %value, %amount 167 ret i32 %result 168} 169 170define amdgpu_ps i32 @s_lshr_i32_31(i32 inreg %value) { 171; GCN-LABEL: s_lshr_i32_31: 172; GCN: ; %bb.0: 173; GCN-NEXT: s_lshr_b32 s0, s0, 31 174; GCN-NEXT: ; return to shader part epilog 175 %result = lshr i32 %value, 31 176 ret i32 %result 177} 178 179define amdgpu_ps float @lshr_i32_sv(i32 inreg %value, i32 %amount) { 180; GFX6-LABEL: lshr_i32_sv: 181; GFX6: ; %bb.0: 182; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 183; GFX6-NEXT: ; return to shader part epilog 184; 185; GFX8-LABEL: lshr_i32_sv: 186; GFX8: ; %bb.0: 187; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 188; GFX8-NEXT: ; return to shader part epilog 189; 190; GFX9-LABEL: lshr_i32_sv: 191; GFX9: ; %bb.0: 192; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0 193; GFX9-NEXT: ; return to shader part epilog 194 %result = lshr i32 %value, %amount 195 %cast = bitcast i32 %result to float 196 ret float %cast 197} 198 199define amdgpu_ps float @lshr_i32_vs(i32 %value, i32 inreg %amount) { 200; GCN-LABEL: lshr_i32_vs: 201; GCN: ; %bb.0: 202; GCN-NEXT: v_lshrrev_b32_e32 v0, s0, v0 203; GCN-NEXT: ; return to shader part epilog 204 %result = lshr i32 %value, %amount 205 %cast = bitcast i32 %result to float 206 ret float %cast 207} 208 209define <2 x i32> @v_lshr_v2i32(<2 x i32> %value, <2 x i32> %amount) { 210; GCN-LABEL: v_lshr_v2i32: 211; GCN: ; %bb.0: 212; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; GCN-NEXT: v_lshrrev_b32_e32 v0, v2, v0 214; GCN-NEXT: v_lshrrev_b32_e32 v1, v3, v1 215; GCN-NEXT: s_setpc_b64 s[30:31] 216 %result = lshr <2 x i32> %value, %amount 217 ret <2 x i32> %result 218} 219 220define <2 x i32> @v_lshr_v2i32_31(<2 x i32> %value) { 221; GCN-LABEL: v_lshr_v2i32_31: 222; GCN: ; %bb.0: 223; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v0 225; GCN-NEXT: v_lshrrev_b32_e32 v1, 31, v1 226; GCN-NEXT: s_setpc_b64 s[30:31] 227 %result = lshr <2 x i32> %value, <i32 31, i32 31> 228 ret <2 x i32> %result 229} 230 231define amdgpu_ps <2 x i32> @s_lshr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) { 232; GCN-LABEL: s_lshr_v2i32: 233; GCN: ; %bb.0: 234; GCN-NEXT: s_lshr_b32 s0, s0, s2 235; GCN-NEXT: s_lshr_b32 s1, s1, s3 236; GCN-NEXT: ; return to shader part epilog 237 %result = lshr <2 x i32> %value, %amount 238 ret <2 x i32> %result 239} 240 241define <3 x i32> @v_lshr_v3i32(<3 x i32> %value, <3 x i32> %amount) { 242; GCN-LABEL: v_lshr_v3i32: 243; GCN: ; %bb.0: 244; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GCN-NEXT: v_lshrrev_b32_e32 v0, v3, v0 246; GCN-NEXT: v_lshrrev_b32_e32 v1, v4, v1 247; GCN-NEXT: v_lshrrev_b32_e32 v2, v5, v2 248; GCN-NEXT: s_setpc_b64 s[30:31] 249 %result = lshr <3 x i32> %value, %amount 250 ret <3 x i32> %result 251} 252 253define amdgpu_ps <3 x i32> @s_lshr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) { 254; GCN-LABEL: s_lshr_v3i32: 255; GCN: ; %bb.0: 256; GCN-NEXT: s_lshr_b32 s0, s0, s3 257; GCN-NEXT: s_lshr_b32 s1, s1, s4 258; GCN-NEXT: s_lshr_b32 s2, s2, s5 259; GCN-NEXT: ; return to shader part epilog 260 %result = lshr <3 x i32> %value, %amount 261 ret <3 x i32> %result 262} 263 264define <4 x i32> @v_lshr_v4i32(<4 x i32> %value, <4 x i32> %amount) { 265; GCN-LABEL: v_lshr_v4i32: 266; GCN: ; %bb.0: 267; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GCN-NEXT: v_lshrrev_b32_e32 v0, v4, v0 269; GCN-NEXT: v_lshrrev_b32_e32 v1, v5, v1 270; GCN-NEXT: v_lshrrev_b32_e32 v2, v6, v2 271; GCN-NEXT: v_lshrrev_b32_e32 v3, v7, v3 272; GCN-NEXT: s_setpc_b64 s[30:31] 273 %result = lshr <4 x i32> %value, %amount 274 ret <4 x i32> %result 275} 276 277define amdgpu_ps <4 x i32> @s_lshr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) { 278; GCN-LABEL: s_lshr_v4i32: 279; GCN: ; %bb.0: 280; GCN-NEXT: s_lshr_b32 s0, s0, s4 281; GCN-NEXT: s_lshr_b32 s1, s1, s5 282; GCN-NEXT: s_lshr_b32 s2, s2, s6 283; GCN-NEXT: s_lshr_b32 s3, s3, s7 284; GCN-NEXT: ; return to shader part epilog 285 %result = lshr <4 x i32> %value, %amount 286 ret <4 x i32> %result 287} 288 289define <5 x i32> @v_lshr_v5i32(<5 x i32> %value, <5 x i32> %amount) { 290; GCN-LABEL: v_lshr_v5i32: 291; GCN: ; %bb.0: 292; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GCN-NEXT: v_lshrrev_b32_e32 v0, v5, v0 294; GCN-NEXT: v_lshrrev_b32_e32 v1, v6, v1 295; GCN-NEXT: v_lshrrev_b32_e32 v2, v7, v2 296; GCN-NEXT: v_lshrrev_b32_e32 v3, v8, v3 297; GCN-NEXT: v_lshrrev_b32_e32 v4, v9, v4 298; GCN-NEXT: s_setpc_b64 s[30:31] 299 %result = lshr <5 x i32> %value, %amount 300 ret <5 x i32> %result 301} 302 303define amdgpu_ps <5 x i32> @s_lshr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) { 304; GCN-LABEL: s_lshr_v5i32: 305; GCN: ; %bb.0: 306; GCN-NEXT: s_lshr_b32 s0, s0, s5 307; GCN-NEXT: s_lshr_b32 s1, s1, s6 308; GCN-NEXT: s_lshr_b32 s2, s2, s7 309; GCN-NEXT: s_lshr_b32 s3, s3, s8 310; GCN-NEXT: s_lshr_b32 s4, s4, s9 311; GCN-NEXT: ; return to shader part epilog 312 %result = lshr <5 x i32> %value, %amount 313 ret <5 x i32> %result 314} 315 316define <16 x i32> @v_lshr_v16i32(<16 x i32> %value, <16 x i32> %amount) { 317; GCN-LABEL: v_lshr_v16i32: 318; GCN: ; %bb.0: 319; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 320; GCN-NEXT: v_lshrrev_b32_e32 v0, v16, v0 321; GCN-NEXT: v_lshrrev_b32_e32 v1, v17, v1 322; GCN-NEXT: v_lshrrev_b32_e32 v2, v18, v2 323; GCN-NEXT: v_lshrrev_b32_e32 v3, v19, v3 324; GCN-NEXT: v_lshrrev_b32_e32 v4, v20, v4 325; GCN-NEXT: v_lshrrev_b32_e32 v5, v21, v5 326; GCN-NEXT: v_lshrrev_b32_e32 v6, v22, v6 327; GCN-NEXT: v_lshrrev_b32_e32 v7, v23, v7 328; GCN-NEXT: v_lshrrev_b32_e32 v8, v24, v8 329; GCN-NEXT: v_lshrrev_b32_e32 v9, v25, v9 330; GCN-NEXT: v_lshrrev_b32_e32 v10, v26, v10 331; GCN-NEXT: v_lshrrev_b32_e32 v11, v27, v11 332; GCN-NEXT: v_lshrrev_b32_e32 v12, v28, v12 333; GCN-NEXT: v_lshrrev_b32_e32 v13, v29, v13 334; GCN-NEXT: v_lshrrev_b32_e32 v14, v30, v14 335; GCN-NEXT: v_lshrrev_b32_e32 v15, v31, v15 336; GCN-NEXT: s_setpc_b64 s[30:31] 337 %result = lshr <16 x i32> %value, %amount 338 ret <16 x i32> %result 339} 340 341define amdgpu_ps <16 x i32> @s_lshr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) { 342; GCN-LABEL: s_lshr_v16i32: 343; GCN: ; %bb.0: 344; GCN-NEXT: s_lshr_b32 s0, s0, s16 345; GCN-NEXT: s_lshr_b32 s1, s1, s17 346; GCN-NEXT: s_lshr_b32 s2, s2, s18 347; GCN-NEXT: s_lshr_b32 s3, s3, s19 348; GCN-NEXT: s_lshr_b32 s4, s4, s20 349; GCN-NEXT: s_lshr_b32 s5, s5, s21 350; GCN-NEXT: s_lshr_b32 s6, s6, s22 351; GCN-NEXT: s_lshr_b32 s7, s7, s23 352; GCN-NEXT: s_lshr_b32 s8, s8, s24 353; GCN-NEXT: s_lshr_b32 s9, s9, s25 354; GCN-NEXT: s_lshr_b32 s10, s10, s26 355; GCN-NEXT: s_lshr_b32 s11, s11, s27 356; GCN-NEXT: s_lshr_b32 s12, s12, s28 357; GCN-NEXT: s_lshr_b32 s13, s13, s29 358; GCN-NEXT: s_lshr_b32 s14, s14, s30 359; GCN-NEXT: s_lshr_b32 s15, s15, s31 360; GCN-NEXT: ; return to shader part epilog 361 %result = lshr <16 x i32> %value, %amount 362 ret <16 x i32> %result 363} 364 365define i16 @v_lshr_i16(i16 %value, i16 %amount) { 366; GFX6-LABEL: v_lshr_i16: 367; GFX6: ; %bb.0: 368; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; GFX6-NEXT: s_mov_b32 s4, 0xffff 370; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 371; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 372; GFX6-NEXT: v_lshrrev_b32_e32 v0, v1, v0 373; GFX6-NEXT: s_setpc_b64 s[30:31] 374; 375; GFX8-LABEL: v_lshr_i16: 376; GFX8: ; %bb.0: 377; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 378; GFX8-NEXT: v_lshrrev_b16_e32 v0, v1, v0 379; GFX8-NEXT: s_setpc_b64 s[30:31] 380; 381; GFX9-LABEL: v_lshr_i16: 382; GFX9: ; %bb.0: 383; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX9-NEXT: v_lshrrev_b16_e32 v0, v1, v0 385; GFX9-NEXT: s_setpc_b64 s[30:31] 386 %result = lshr i16 %value, %amount 387 ret i16 %result 388} 389 390define i16 @v_lshr_i16_31(i16 %value) { 391; GCN-LABEL: v_lshr_i16_31: 392; GCN: ; %bb.0: 393; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 394; GCN-NEXT: s_setpc_b64 s[30:31] 395 %result = lshr i16 %value, 31 396 ret i16 %result 397} 398 399define amdgpu_ps i16 @s_lshr_i16(i16 inreg %value, i16 inreg %amount) { 400; GFX6-LABEL: s_lshr_i16: 401; GFX6: ; %bb.0: 402; GFX6-NEXT: s_mov_b32 s2, 0xffff 403; GFX6-NEXT: s_and_b32 s1, s1, s2 404; GFX6-NEXT: s_and_b32 s0, s0, s2 405; GFX6-NEXT: s_lshr_b32 s0, s0, s1 406; GFX6-NEXT: ; return to shader part epilog 407; 408; GFX8-LABEL: s_lshr_i16: 409; GFX8: ; %bb.0: 410; GFX8-NEXT: s_mov_b32 s2, 0xffff 411; GFX8-NEXT: s_and_b32 s0, s0, s2 412; GFX8-NEXT: s_and_b32 s1, s1, s2 413; GFX8-NEXT: s_lshr_b32 s0, s0, s1 414; GFX8-NEXT: ; return to shader part epilog 415; 416; GFX9-LABEL: s_lshr_i16: 417; GFX9: ; %bb.0: 418; GFX9-NEXT: s_mov_b32 s2, 0xffff 419; GFX9-NEXT: s_and_b32 s0, s0, s2 420; GFX9-NEXT: s_and_b32 s1, s1, s2 421; GFX9-NEXT: s_lshr_b32 s0, s0, s1 422; GFX9-NEXT: ; return to shader part epilog 423 %result = lshr i16 %value, %amount 424 ret i16 %result 425} 426 427define amdgpu_ps i16 @s_lshr_i16_15(i16 inreg %value) { 428; GCN-LABEL: s_lshr_i16_15: 429; GCN: ; %bb.0: 430; GCN-NEXT: s_and_b32 s0, s0, 0xffff 431; GCN-NEXT: s_lshr_b32 s0, s0, 15 432; GCN-NEXT: ; return to shader part epilog 433 %result = lshr i16 %value, 15 434 ret i16 %result 435} 436 437define amdgpu_ps half @lshr_i16_sv(i16 inreg %value, i16 %amount) { 438; GFX6-LABEL: lshr_i16_sv: 439; GFX6: ; %bb.0: 440; GFX6-NEXT: s_mov_b32 s1, 0xffff 441; GFX6-NEXT: v_and_b32_e32 v0, s1, v0 442; GFX6-NEXT: s_and_b32 s0, s0, s1 443; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 444; GFX6-NEXT: ; return to shader part epilog 445; 446; GFX8-LABEL: lshr_i16_sv: 447; GFX8: ; %bb.0: 448; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 449; GFX8-NEXT: ; return to shader part epilog 450; 451; GFX9-LABEL: lshr_i16_sv: 452; GFX9: ; %bb.0: 453; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s0 454; GFX9-NEXT: ; return to shader part epilog 455 %result = lshr i16 %value, %amount 456 %cast = bitcast i16 %result to half 457 ret half %cast 458} 459 460define amdgpu_ps half @lshr_i16_vs(i16 %value, i16 inreg %amount) { 461; GFX6-LABEL: lshr_i16_vs: 462; GFX6: ; %bb.0: 463; GFX6-NEXT: s_mov_b32 s1, 0xffff 464; GFX6-NEXT: s_and_b32 s0, s0, s1 465; GFX6-NEXT: v_and_b32_e32 v0, s1, v0 466; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 467; GFX6-NEXT: ; return to shader part epilog 468; 469; GFX8-LABEL: lshr_i16_vs: 470; GFX8: ; %bb.0: 471; GFX8-NEXT: v_lshrrev_b16_e32 v0, s0, v0 472; GFX8-NEXT: ; return to shader part epilog 473; 474; GFX9-LABEL: lshr_i16_vs: 475; GFX9: ; %bb.0: 476; GFX9-NEXT: v_lshrrev_b16_e32 v0, s0, v0 477; GFX9-NEXT: ; return to shader part epilog 478 %result = lshr i16 %value, %amount 479 %cast = bitcast i16 %result to half 480 ret half %cast 481} 482 483define <2 x i16> @v_lshr_v2i16(<2 x i16> %value, <2 x i16> %amount) { 484; GFX6-LABEL: v_lshr_v2i16: 485; GFX6: ; %bb.0: 486; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 487; GFX6-NEXT: s_mov_b32 s4, 0xffff 488; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 489; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 490; GFX6-NEXT: v_lshrrev_b32_e32 v0, v2, v0 491; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 492; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 493; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 494; GFX6-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX8-LABEL: v_lshr_v2i16: 497; GFX8: ; %bb.0: 498; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX8-NEXT: v_lshrrev_b16_e32 v2, v1, v0 500; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 501; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 502; GFX8-NEXT: s_setpc_b64 s[30:31] 503; 504; GFX9-LABEL: v_lshr_v2i16: 505; GFX9: ; %bb.0: 506; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 507; GFX9-NEXT: v_pk_lshrrev_b16 v0, v1, v0 508; GFX9-NEXT: s_setpc_b64 s[30:31] 509 %result = lshr <2 x i16> %value, %amount 510 ret <2 x i16> %result 511} 512 513define <2 x i16> @v_lshr_v2i16_15(<2 x i16> %value) { 514; GFX6-LABEL: v_lshr_v2i16_15: 515; GFX6: ; %bb.0: 516; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX6-NEXT: s_mov_b32 s4, 0xffff 518; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 519; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 520; GFX6-NEXT: v_lshrrev_b32_e32 v0, 15, v0 521; GFX6-NEXT: v_lshrrev_b32_e32 v1, 15, v1 522; GFX6-NEXT: s_setpc_b64 s[30:31] 523; 524; GFX8-LABEL: v_lshr_v2i16_15: 525; GFX8: ; %bb.0: 526; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX8-NEXT: v_mov_b32_e32 v2, 15 528; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0 529; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 530; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 531; GFX8-NEXT: s_setpc_b64 s[30:31] 532; 533; GFX9-LABEL: v_lshr_v2i16_15: 534; GFX9: ; %bb.0: 535; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX9-NEXT: v_pk_lshrrev_b16 v0, 15, v0 op_sel_hi:[0,1] 537; GFX9-NEXT: s_setpc_b64 s[30:31] 538 %result = lshr <2 x i16> %value, <i16 15, i16 15> 539 ret <2 x i16> %result 540} 541 542define amdgpu_ps i32 @s_lshr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) { 543; GFX6-LABEL: s_lshr_v2i16: 544; GFX6: ; %bb.0: 545; GFX6-NEXT: s_mov_b32 s4, 0xffff 546; GFX6-NEXT: s_and_b32 s2, s2, s4 547; GFX6-NEXT: s_and_b32 s0, s0, s4 548; GFX6-NEXT: s_lshr_b32 s0, s0, s2 549; GFX6-NEXT: s_and_b32 s2, s3, s4 550; GFX6-NEXT: s_and_b32 s1, s1, s4 551; GFX6-NEXT: s_lshr_b32 s1, s1, s2 552; GFX6-NEXT: s_lshl_b32 s1, s1, 16 553; GFX6-NEXT: s_or_b32 s0, s0, s1 554; GFX6-NEXT: ; return to shader part epilog 555; 556; GFX8-LABEL: s_lshr_v2i16: 557; GFX8: ; %bb.0: 558; GFX8-NEXT: s_mov_b32 s3, 0xffff 559; GFX8-NEXT: s_lshr_b32 s2, s0, 16 560; GFX8-NEXT: s_lshr_b32 s4, s1, 16 561; GFX8-NEXT: s_and_b32 s0, s0, s3 562; GFX8-NEXT: s_and_b32 s1, s1, s3 563; GFX8-NEXT: s_lshr_b32 s0, s0, s1 564; GFX8-NEXT: s_lshr_b32 s1, s2, s4 565; GFX8-NEXT: s_lshl_b32 s1, s1, 16 566; GFX8-NEXT: s_and_b32 s0, s0, s3 567; GFX8-NEXT: s_or_b32 s0, s1, s0 568; GFX8-NEXT: ; return to shader part epilog 569; 570; GFX9-LABEL: s_lshr_v2i16: 571; GFX9: ; %bb.0: 572; GFX9-NEXT: s_lshr_b32 s2, s0, 16 573; GFX9-NEXT: s_lshr_b32 s3, s1, 16 574; GFX9-NEXT: s_lshr_b32 s0, s0, s1 575; GFX9-NEXT: s_lshr_b32 s1, s2, s3 576; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 577; GFX9-NEXT: ; return to shader part epilog 578 %result = lshr <2 x i16> %value, %amount 579 %cast = bitcast <2 x i16> %result to i32 580 ret i32 %cast 581} 582 583define amdgpu_ps float @lshr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) { 584; GFX6-LABEL: lshr_v2i16_sv: 585; GFX6: ; %bb.0: 586; GFX6-NEXT: s_mov_b32 s2, 0xffff 587; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 588; GFX6-NEXT: s_and_b32 s0, s0, s2 589; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 590; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 591; GFX6-NEXT: s_and_b32 s0, s1, s2 592; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 593; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 594; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 595; GFX6-NEXT: ; return to shader part epilog 596; 597; GFX8-LABEL: lshr_v2i16_sv: 598; GFX8: ; %bb.0: 599; GFX8-NEXT: s_lshr_b32 s1, s0, 16 600; GFX8-NEXT: v_mov_b32_e32 v2, s1 601; GFX8-NEXT: v_lshrrev_b16_e64 v1, v0, s0 602; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 603; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 604; GFX8-NEXT: ; return to shader part epilog 605; 606; GFX9-LABEL: lshr_v2i16_sv: 607; GFX9: ; %bb.0: 608; GFX9-NEXT: v_pk_lshrrev_b16 v0, v0, s0 609; GFX9-NEXT: ; return to shader part epilog 610 %result = lshr <2 x i16> %value, %amount 611 %cast = bitcast <2 x i16> %result to float 612 ret float %cast 613} 614 615define amdgpu_ps float @lshr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { 616; GFX6-LABEL: lshr_v2i16_vs: 617; GFX6: ; %bb.0: 618; GFX6-NEXT: s_mov_b32 s2, 0xffff 619; GFX6-NEXT: s_and_b32 s0, s0, s2 620; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 621; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 622; GFX6-NEXT: s_and_b32 s0, s1, s2 623; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 624; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 625; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 626; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 627; GFX6-NEXT: ; return to shader part epilog 628; 629; GFX8-LABEL: lshr_v2i16_vs: 630; GFX8: ; %bb.0: 631; GFX8-NEXT: s_lshr_b32 s1, s0, 16 632; GFX8-NEXT: v_mov_b32_e32 v2, s1 633; GFX8-NEXT: v_lshrrev_b16_e32 v1, s0, v0 634; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 635; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 636; GFX8-NEXT: ; return to shader part epilog 637; 638; GFX9-LABEL: lshr_v2i16_vs: 639; GFX9: ; %bb.0: 640; GFX9-NEXT: v_pk_lshrrev_b16 v0, s0, v0 641; GFX9-NEXT: ; return to shader part epilog 642 %result = lshr <2 x i16> %value, %amount 643 %cast = bitcast <2 x i16> %result to float 644 ret float %cast 645} 646 647; FIXME 648; define <3 x i16> @v_lshr_v3i16(<3 x i16> %value, <3 x i16> %amount) { 649; %result = lshr <3 x i16> %value, %amount 650; ret <3 x i16> %result 651; } 652 653; define amdgpu_ps <3 x i16> @s_lshr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) { 654; %result = lshr <3 x i16> %value, %amount 655; ret <3 x i16> %result 656; } 657 658define <2 x float> @v_lshr_v4i16(<4 x i16> %value, <4 x i16> %amount) { 659; GFX6-LABEL: v_lshr_v4i16: 660; GFX6: ; %bb.0: 661; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 662; GFX6-NEXT: s_mov_b32 s4, 0xffff 663; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 664; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 665; GFX6-NEXT: v_lshrrev_b32_e32 v0, v4, v0 666; GFX6-NEXT: v_and_b32_e32 v4, s4, v5 667; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 668; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1 669; GFX6-NEXT: v_and_b32_e32 v4, s4, v6 670; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 671; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 672; GFX6-NEXT: v_and_b32_e32 v4, s4, v7 673; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 674; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 675; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 676; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 677; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 678; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 679; GFX6-NEXT: s_setpc_b64 s[30:31] 680; 681; GFX8-LABEL: v_lshr_v4i16: 682; GFX8: ; %bb.0: 683; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 684; GFX8-NEXT: v_lshrrev_b16_e32 v4, v2, v0 685; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 686; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v1 687; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 688; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 689; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 690; GFX8-NEXT: s_setpc_b64 s[30:31] 691; 692; GFX9-LABEL: v_lshr_v4i16: 693; GFX9: ; %bb.0: 694; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 695; GFX9-NEXT: v_pk_lshrrev_b16 v0, v2, v0 696; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1 697; GFX9-NEXT: s_setpc_b64 s[30:31] 698 %result = lshr <4 x i16> %value, %amount 699 %cast = bitcast <4 x i16> %result to <2 x float> 700 ret <2 x float> %cast 701} 702 703define amdgpu_ps <2 x i32> @s_lshr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) { 704; GFX6-LABEL: s_lshr_v4i16: 705; GFX6: ; %bb.0: 706; GFX6-NEXT: s_mov_b32 s8, 0xffff 707; GFX6-NEXT: s_and_b32 s4, s4, s8 708; GFX6-NEXT: s_and_b32 s0, s0, s8 709; GFX6-NEXT: s_lshr_b32 s0, s0, s4 710; GFX6-NEXT: s_and_b32 s4, s5, s8 711; GFX6-NEXT: s_and_b32 s1, s1, s8 712; GFX6-NEXT: s_lshr_b32 s1, s1, s4 713; GFX6-NEXT: s_and_b32 s4, s6, s8 714; GFX6-NEXT: s_and_b32 s2, s2, s8 715; GFX6-NEXT: s_lshr_b32 s2, s2, s4 716; GFX6-NEXT: s_and_b32 s4, s7, s8 717; GFX6-NEXT: s_and_b32 s3, s3, s8 718; GFX6-NEXT: s_lshl_b32 s1, s1, 16 719; GFX6-NEXT: s_lshr_b32 s3, s3, s4 720; GFX6-NEXT: s_or_b32 s0, s0, s1 721; GFX6-NEXT: s_lshl_b32 s1, s3, 16 722; GFX6-NEXT: s_or_b32 s1, s2, s1 723; GFX6-NEXT: ; return to shader part epilog 724; 725; GFX8-LABEL: s_lshr_v4i16: 726; GFX8: ; %bb.0: 727; GFX8-NEXT: s_mov_b32 s6, 0xffff 728; GFX8-NEXT: s_lshr_b32 s4, s0, 16 729; GFX8-NEXT: s_lshr_b32 s7, s2, 16 730; GFX8-NEXT: s_and_b32 s0, s0, s6 731; GFX8-NEXT: s_and_b32 s2, s2, s6 732; GFX8-NEXT: s_lshr_b32 s0, s0, s2 733; GFX8-NEXT: s_lshr_b32 s2, s4, s7 734; GFX8-NEXT: s_lshr_b32 s5, s1, 16 735; GFX8-NEXT: s_lshr_b32 s8, s3, 16 736; GFX8-NEXT: s_and_b32 s1, s1, s6 737; GFX8-NEXT: s_and_b32 s3, s3, s6 738; GFX8-NEXT: s_lshr_b32 s1, s1, s3 739; GFX8-NEXT: s_lshr_b32 s3, s5, s8 740; GFX8-NEXT: s_lshl_b32 s2, s2, 16 741; GFX8-NEXT: s_and_b32 s0, s0, s6 742; GFX8-NEXT: s_or_b32 s0, s2, s0 743; GFX8-NEXT: s_lshl_b32 s2, s3, 16 744; GFX8-NEXT: s_and_b32 s1, s1, s6 745; GFX8-NEXT: s_or_b32 s1, s2, s1 746; GFX8-NEXT: ; return to shader part epilog 747; 748; GFX9-LABEL: s_lshr_v4i16: 749; GFX9: ; %bb.0: 750; GFX9-NEXT: s_lshr_b32 s4, s0, 16 751; GFX9-NEXT: s_lshr_b32 s5, s2, 16 752; GFX9-NEXT: s_lshr_b32 s0, s0, s2 753; GFX9-NEXT: s_lshr_b32 s2, s4, s5 754; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 755; GFX9-NEXT: s_lshr_b32 s2, s1, 16 756; GFX9-NEXT: s_lshr_b32 s4, s3, 16 757; GFX9-NEXT: s_lshr_b32 s1, s1, s3 758; GFX9-NEXT: s_lshr_b32 s2, s2, s4 759; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 760; GFX9-NEXT: ; return to shader part epilog 761 %result = lshr <4 x i16> %value, %amount 762 %cast = bitcast <4 x i16> %result to <2 x i32> 763 ret <2 x i32> %cast 764} 765 766; FIXME 767; define <5 x i16> @v_lshr_v5i16(<5 x i16> %value, <5 x i16> %amount) { 768; %result = lshr <5 x i16> %value, %amount 769; ret <5 x i16> %result 770; } 771 772; define amdgpu_ps <5 x i16> @s_lshr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) { 773; %result = lshr <5 x i16> %value, %amount 774; ret <5 x i16> %result 775; } 776 777; define <3 x float> @v_lshr_v6i16(<6 x i16> %value, <6 x i16> %amount) { 778; %result = lshr <6 x i16> %value, %amount 779; %cast = bitcast <6 x i16> %result to <3 x float> 780; ret <3 x float> %cast 781; } 782 783; define amdgpu_ps <3 x i32> @s_lshr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) { 784; %result = lshr <6 x i16> %value, %amount 785; %cast = bitcast <6 x i16> %result to <3 x i32> 786; ret <3 x i32> %cast 787; } 788 789define <4 x float> @v_lshr_v8i16(<8 x i16> %value, <8 x i16> %amount) { 790; GFX6-LABEL: v_lshr_v8i16: 791; GFX6: ; %bb.0: 792; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GFX6-NEXT: s_mov_b32 s4, 0xffff 794; GFX6-NEXT: v_and_b32_e32 v8, s4, v8 795; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 796; GFX6-NEXT: v_lshrrev_b32_e32 v0, v8, v0 797; GFX6-NEXT: v_and_b32_e32 v8, s4, v9 798; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 799; GFX6-NEXT: v_lshrrev_b32_e32 v1, v8, v1 800; GFX6-NEXT: v_and_b32_e32 v8, s4, v10 801; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 802; GFX6-NEXT: v_lshrrev_b32_e32 v2, v8, v2 803; GFX6-NEXT: v_and_b32_e32 v8, s4, v11 804; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 805; GFX6-NEXT: v_lshrrev_b32_e32 v3, v8, v3 806; GFX6-NEXT: v_and_b32_e32 v8, s4, v12 807; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 808; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 809; GFX6-NEXT: v_and_b32_e32 v8, s4, v13 810; GFX6-NEXT: v_and_b32_e32 v5, s4, v5 811; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff 812; GFX6-NEXT: v_lshrrev_b32_e32 v5, v8, v5 813; GFX6-NEXT: v_and_b32_e32 v8, s4, v14 814; GFX6-NEXT: v_and_b32_e32 v6, s4, v6 815; GFX6-NEXT: v_lshrrev_b32_e32 v6, v8, v6 816; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 817; GFX6-NEXT: v_and_b32_e32 v8, v15, v16 818; GFX6-NEXT: v_and_b32_e32 v7, v7, v16 819; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 820; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 821; GFX6-NEXT: v_lshrrev_b32_e32 v7, v8, v7 822; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 823; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 824; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 825; GFX6-NEXT: v_or_b32_e32 v2, v4, v2 826; GFX6-NEXT: v_or_b32_e32 v3, v6, v3 827; GFX6-NEXT: s_setpc_b64 s[30:31] 828; 829; GFX8-LABEL: v_lshr_v8i16: 830; GFX8: ; %bb.0: 831; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 832; GFX8-NEXT: v_lshrrev_b16_e32 v8, v4, v0 833; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 834; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v1 835; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 836; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 837; GFX8-NEXT: v_lshrrev_b16_e32 v4, v6, v2 838; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 839; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 840; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v3 841; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 842; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 843; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 844; GFX8-NEXT: s_setpc_b64 s[30:31] 845; 846; GFX9-LABEL: v_lshr_v8i16: 847; GFX9: ; %bb.0: 848; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; GFX9-NEXT: v_pk_lshrrev_b16 v0, v4, v0 850; GFX9-NEXT: v_pk_lshrrev_b16 v1, v5, v1 851; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 852; GFX9-NEXT: v_pk_lshrrev_b16 v3, v7, v3 853; GFX9-NEXT: s_setpc_b64 s[30:31] 854 %result = lshr <8 x i16> %value, %amount 855 %cast = bitcast <8 x i16> %result to <4 x float> 856 ret <4 x float> %cast 857} 858 859define amdgpu_ps <4 x i32> @s_lshr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) { 860; GFX6-LABEL: s_lshr_v8i16: 861; GFX6: ; %bb.0: 862; GFX6-NEXT: s_mov_b32 s16, 0xffff 863; GFX6-NEXT: s_and_b32 s8, s8, s16 864; GFX6-NEXT: s_and_b32 s0, s0, s16 865; GFX6-NEXT: s_lshr_b32 s0, s0, s8 866; GFX6-NEXT: s_and_b32 s8, s9, s16 867; GFX6-NEXT: s_and_b32 s1, s1, s16 868; GFX6-NEXT: s_lshr_b32 s1, s1, s8 869; GFX6-NEXT: s_and_b32 s8, s10, s16 870; GFX6-NEXT: s_and_b32 s2, s2, s16 871; GFX6-NEXT: s_lshr_b32 s2, s2, s8 872; GFX6-NEXT: s_and_b32 s8, s11, s16 873; GFX6-NEXT: s_and_b32 s3, s3, s16 874; GFX6-NEXT: s_lshr_b32 s3, s3, s8 875; GFX6-NEXT: s_and_b32 s8, s12, s16 876; GFX6-NEXT: s_and_b32 s4, s4, s16 877; GFX6-NEXT: s_lshr_b32 s4, s4, s8 878; GFX6-NEXT: s_and_b32 s8, s13, s16 879; GFX6-NEXT: s_and_b32 s5, s5, s16 880; GFX6-NEXT: s_lshr_b32 s5, s5, s8 881; GFX6-NEXT: s_and_b32 s8, s14, s16 882; GFX6-NEXT: s_and_b32 s6, s6, s16 883; GFX6-NEXT: s_lshr_b32 s6, s6, s8 884; GFX6-NEXT: s_lshl_b32 s1, s1, 16 885; GFX6-NEXT: s_and_b32 s8, s15, s16 886; GFX6-NEXT: s_and_b32 s7, s7, s16 887; GFX6-NEXT: s_or_b32 s0, s0, s1 888; GFX6-NEXT: s_lshl_b32 s1, s3, 16 889; GFX6-NEXT: s_lshr_b32 s7, s7, s8 890; GFX6-NEXT: s_lshl_b32 s3, s7, 16 891; GFX6-NEXT: s_or_b32 s1, s2, s1 892; GFX6-NEXT: s_lshl_b32 s2, s5, 16 893; GFX6-NEXT: s_or_b32 s2, s4, s2 894; GFX6-NEXT: s_or_b32 s3, s6, s3 895; GFX6-NEXT: ; return to shader part epilog 896; 897; GFX8-LABEL: s_lshr_v8i16: 898; GFX8: ; %bb.0: 899; GFX8-NEXT: s_mov_b32 s12, 0xffff 900; GFX8-NEXT: s_lshr_b32 s8, s0, 16 901; GFX8-NEXT: s_lshr_b32 s13, s4, 16 902; GFX8-NEXT: s_and_b32 s0, s0, s12 903; GFX8-NEXT: s_and_b32 s4, s4, s12 904; GFX8-NEXT: s_lshr_b32 s0, s0, s4 905; GFX8-NEXT: s_lshr_b32 s4, s8, s13 906; GFX8-NEXT: s_lshr_b32 s9, s1, 16 907; GFX8-NEXT: s_lshr_b32 s14, s5, 16 908; GFX8-NEXT: s_and_b32 s1, s1, s12 909; GFX8-NEXT: s_and_b32 s5, s5, s12 910; GFX8-NEXT: s_lshr_b32 s1, s1, s5 911; GFX8-NEXT: s_lshr_b32 s10, s2, 16 912; GFX8-NEXT: s_lshr_b32 s15, s6, 16 913; GFX8-NEXT: s_and_b32 s2, s2, s12 914; GFX8-NEXT: s_and_b32 s6, s6, s12 915; GFX8-NEXT: s_lshr_b32 s5, s9, s14 916; GFX8-NEXT: s_lshl_b32 s4, s4, 16 917; GFX8-NEXT: s_and_b32 s0, s0, s12 918; GFX8-NEXT: s_lshr_b32 s2, s2, s6 919; GFX8-NEXT: s_lshr_b32 s11, s3, 16 920; GFX8-NEXT: s_lshr_b32 s16, s7, 16 921; GFX8-NEXT: s_or_b32 s0, s4, s0 922; GFX8-NEXT: s_and_b32 s3, s3, s12 923; GFX8-NEXT: s_and_b32 s7, s7, s12 924; GFX8-NEXT: s_lshr_b32 s6, s10, s15 925; GFX8-NEXT: s_lshl_b32 s4, s5, 16 926; GFX8-NEXT: s_and_b32 s1, s1, s12 927; GFX8-NEXT: s_lshr_b32 s3, s3, s7 928; GFX8-NEXT: s_or_b32 s1, s4, s1 929; GFX8-NEXT: s_lshr_b32 s7, s11, s16 930; GFX8-NEXT: s_lshl_b32 s4, s6, 16 931; GFX8-NEXT: s_and_b32 s2, s2, s12 932; GFX8-NEXT: s_or_b32 s2, s4, s2 933; GFX8-NEXT: s_lshl_b32 s4, s7, 16 934; GFX8-NEXT: s_and_b32 s3, s3, s12 935; GFX8-NEXT: s_or_b32 s3, s4, s3 936; GFX8-NEXT: ; return to shader part epilog 937; 938; GFX9-LABEL: s_lshr_v8i16: 939; GFX9: ; %bb.0: 940; GFX9-NEXT: s_lshr_b32 s8, s0, 16 941; GFX9-NEXT: s_lshr_b32 s9, s4, 16 942; GFX9-NEXT: s_lshr_b32 s0, s0, s4 943; GFX9-NEXT: s_lshr_b32 s4, s8, s9 944; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 945; GFX9-NEXT: s_lshr_b32 s4, s1, 16 946; GFX9-NEXT: s_lshr_b32 s8, s5, 16 947; GFX9-NEXT: s_lshr_b32 s1, s1, s5 948; GFX9-NEXT: s_lshr_b32 s4, s4, s8 949; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 950; GFX9-NEXT: s_lshr_b32 s4, s2, 16 951; GFX9-NEXT: s_lshr_b32 s5, s6, 16 952; GFX9-NEXT: s_lshr_b32 s4, s4, s5 953; GFX9-NEXT: s_lshr_b32 s2, s2, s6 954; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 955; GFX9-NEXT: s_lshr_b32 s4, s3, 16 956; GFX9-NEXT: s_lshr_b32 s5, s7, 16 957; GFX9-NEXT: s_lshr_b32 s3, s3, s7 958; GFX9-NEXT: s_lshr_b32 s4, s4, s5 959; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 960; GFX9-NEXT: ; return to shader part epilog 961 %result = lshr <8 x i16> %value, %amount 962 %cast = bitcast <8 x i16> %result to <4 x i32> 963 ret <4 x i32> %cast 964} 965 966define i64 @v_lshr_i64(i64 %value, i64 %amount) { 967; GFX6-LABEL: v_lshr_i64: 968; GFX6: ; %bb.0: 969; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 970; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], v2 971; GFX6-NEXT: s_setpc_b64 s[30:31] 972; 973; GFX8-LABEL: v_lshr_i64: 974; GFX8: ; %bb.0: 975; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 976; GFX8-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] 977; GFX8-NEXT: s_setpc_b64 s[30:31] 978; 979; GFX9-LABEL: v_lshr_i64: 980; GFX9: ; %bb.0: 981; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; GFX9-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] 983; GFX9-NEXT: s_setpc_b64 s[30:31] 984 %result = lshr i64 %value, %amount 985 ret i64 %result 986} 987 988define i64 @v_lshr_i64_63(i64 %value) { 989; GCN-LABEL: v_lshr_i64_63: 990; GCN: ; %bb.0: 991; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 992; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v1 993; GCN-NEXT: v_mov_b32_e32 v1, 0 994; GCN-NEXT: s_setpc_b64 s[30:31] 995 %result = lshr i64 %value, 63 996 ret i64 %result 997} 998 999define i64 @v_lshr_i64_33(i64 %value) { 1000; GCN-LABEL: v_lshr_i64_33: 1001; GCN: ; %bb.0: 1002; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1003; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v1 1004; GCN-NEXT: v_mov_b32_e32 v1, 0 1005; GCN-NEXT: s_setpc_b64 s[30:31] 1006 %result = lshr i64 %value, 33 1007 ret i64 %result 1008} 1009 1010define i64 @v_lshr_i64_32(i64 %value) { 1011; GCN-LABEL: v_lshr_i64_32: 1012; GCN: ; %bb.0: 1013; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1014; GCN-NEXT: v_mov_b32_e32 v0, v1 1015; GCN-NEXT: v_mov_b32_e32 v1, 0 1016; GCN-NEXT: s_setpc_b64 s[30:31] 1017 %result = lshr i64 %value, 32 1018 ret i64 %result 1019} 1020 1021define i64 @v_lshr_i64_31(i64 %value) { 1022; GFX6-LABEL: v_lshr_i64_31: 1023; GFX6: ; %bb.0: 1024; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1025; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 31 1026; GFX6-NEXT: s_setpc_b64 s[30:31] 1027; 1028; GFX8-LABEL: v_lshr_i64_31: 1029; GFX8: ; %bb.0: 1030; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1031; GFX8-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1] 1032; GFX8-NEXT: s_setpc_b64 s[30:31] 1033; 1034; GFX9-LABEL: v_lshr_i64_31: 1035; GFX9: ; %bb.0: 1036; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1037; GFX9-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1] 1038; GFX9-NEXT: s_setpc_b64 s[30:31] 1039 %result = lshr i64 %value, 31 1040 ret i64 %result 1041} 1042 1043define amdgpu_ps i64 @s_lshr_i64(i64 inreg %value, i64 inreg %amount) { 1044; GCN-LABEL: s_lshr_i64: 1045; GCN: ; %bb.0: 1046; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1047; GCN-NEXT: ; return to shader part epilog 1048 %result = lshr i64 %value, %amount 1049 ret i64 %result 1050} 1051 1052define amdgpu_ps i64 @s_lshr_i64_63(i64 inreg %value) { 1053; GCN-LABEL: s_lshr_i64_63: 1054; GCN: ; %bb.0: 1055; GCN-NEXT: s_lshr_b32 s0, s1, 31 1056; GCN-NEXT: s_mov_b32 s1, 0 1057; GCN-NEXT: ; return to shader part epilog 1058 %result = lshr i64 %value, 63 1059 ret i64 %result 1060} 1061 1062define amdgpu_ps i64 @s_lshr_i64_33(i64 inreg %value) { 1063; GCN-LABEL: s_lshr_i64_33: 1064; GCN: ; %bb.0: 1065; GCN-NEXT: s_lshr_b32 s0, s1, 1 1066; GCN-NEXT: s_mov_b32 s1, 0 1067; GCN-NEXT: ; return to shader part epilog 1068 %result = lshr i64 %value, 33 1069 ret i64 %result 1070} 1071 1072define amdgpu_ps i64 @s_lshr_i64_32(i64 inreg %value) { 1073; GCN-LABEL: s_lshr_i64_32: 1074; GCN: ; %bb.0: 1075; GCN-NEXT: s_mov_b32 s0, s1 1076; GCN-NEXT: s_mov_b32 s1, 0 1077; GCN-NEXT: ; return to shader part epilog 1078 %result = lshr i64 %value, 32 1079 ret i64 %result 1080} 1081 1082define amdgpu_ps i64 @s_lshr_i64_31(i64 inreg %value) { 1083; GCN-LABEL: s_lshr_i64_31: 1084; GCN: ; %bb.0: 1085; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 31 1086; GCN-NEXT: ; return to shader part epilog 1087 %result = lshr i64 %value, 31 1088 ret i64 %result 1089} 1090 1091define amdgpu_ps <2 x float> @lshr_i64_sv(i64 inreg %value, i64 %amount) { 1092; GFX6-LABEL: lshr_i64_sv: 1093; GFX6: ; %bb.0: 1094; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v0 1095; GFX6-NEXT: ; return to shader part epilog 1096; 1097; GFX8-LABEL: lshr_i64_sv: 1098; GFX8: ; %bb.0: 1099; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] 1100; GFX8-NEXT: ; return to shader part epilog 1101; 1102; GFX9-LABEL: lshr_i64_sv: 1103; GFX9: ; %bb.0: 1104; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] 1105; GFX9-NEXT: ; return to shader part epilog 1106 %result = lshr i64 %value, %amount 1107 %cast = bitcast i64 %result to <2 x float> 1108 ret <2 x float> %cast 1109} 1110 1111define amdgpu_ps <2 x float> @lshr_i64_vs(i64 %value, i64 inreg %amount) { 1112; GFX6-LABEL: lshr_i64_vs: 1113; GFX6: ; %bb.0: 1114; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s0 1115; GFX6-NEXT: ; return to shader part epilog 1116; 1117; GFX8-LABEL: lshr_i64_vs: 1118; GFX8: ; %bb.0: 1119; GFX8-NEXT: v_lshrrev_b64 v[0:1], s0, v[0:1] 1120; GFX8-NEXT: ; return to shader part epilog 1121; 1122; GFX9-LABEL: lshr_i64_vs: 1123; GFX9: ; %bb.0: 1124; GFX9-NEXT: v_lshrrev_b64 v[0:1], s0, v[0:1] 1125; GFX9-NEXT: ; return to shader part epilog 1126 %result = lshr i64 %value, %amount 1127 %cast = bitcast i64 %result to <2 x float> 1128 ret <2 x float> %cast 1129} 1130 1131define <2 x i64> @v_lshr_v2i64(<2 x i64> %value, <2 x i64> %amount) { 1132; GFX6-LABEL: v_lshr_v2i64: 1133; GFX6: ; %bb.0: 1134; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1135; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], v4 1136; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v6 1137; GFX6-NEXT: s_setpc_b64 s[30:31] 1138; 1139; GFX8-LABEL: v_lshr_v2i64: 1140; GFX8: ; %bb.0: 1141; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1142; GFX8-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1] 1143; GFX8-NEXT: v_lshrrev_b64 v[2:3], v6, v[2:3] 1144; GFX8-NEXT: s_setpc_b64 s[30:31] 1145; 1146; GFX9-LABEL: v_lshr_v2i64: 1147; GFX9: ; %bb.0: 1148; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1149; GFX9-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1] 1150; GFX9-NEXT: v_lshrrev_b64 v[2:3], v6, v[2:3] 1151; GFX9-NEXT: s_setpc_b64 s[30:31] 1152 %result = lshr <2 x i64> %value, %amount 1153 ret <2 x i64> %result 1154} 1155 1156define <2 x i64> @v_lshr_v2i64_31(<2 x i64> %value) { 1157; GFX6-LABEL: v_lshr_v2i64_31: 1158; GFX6: ; %bb.0: 1159; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1160; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 31 1161; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 31 1162; GFX6-NEXT: s_setpc_b64 s[30:31] 1163; 1164; GFX8-LABEL: v_lshr_v2i64_31: 1165; GFX8: ; %bb.0: 1166; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1167; GFX8-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1] 1168; GFX8-NEXT: v_lshrrev_b64 v[2:3], 31, v[2:3] 1169; GFX8-NEXT: s_setpc_b64 s[30:31] 1170; 1171; GFX9-LABEL: v_lshr_v2i64_31: 1172; GFX9: ; %bb.0: 1173; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1174; GFX9-NEXT: v_lshrrev_b64 v[0:1], 31, v[0:1] 1175; GFX9-NEXT: v_lshrrev_b64 v[2:3], 31, v[2:3] 1176; GFX9-NEXT: s_setpc_b64 s[30:31] 1177 %result = lshr <2 x i64> %value, <i64 31, i64 31> 1178 ret <2 x i64> %result 1179} 1180 1181define amdgpu_ps <2 x i64> @s_lshr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) { 1182; GCN-LABEL: s_lshr_v2i64: 1183; GCN: ; %bb.0: 1184; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 1185; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 1186; GCN-NEXT: ; return to shader part epilog 1187 %result = lshr <2 x i64> %value, %amount 1188 ret <2 x i64> %result 1189} 1190