1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5 6define i8 @v_ashr_i8(i8 %value, i8 %amount) { 7; GFX6-LABEL: v_ashr_i8: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 11; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 12; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0 13; GFX6-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX8-LABEL: v_ashr_i8: 16; GFX8: ; %bb.0: 17; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 19; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1 20; GFX8-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX9-LABEL: v_ashr_i8: 23; GFX9: ; %bb.0: 24; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 26; GFX9-NEXT: s_setpc_b64 s[30:31] 27 %result = ashr i8 %value, %amount 28 ret i8 %result 29} 30 31define i8 @v_ashr_i8_7(i8 %value) { 32; GFX6-LABEL: v_ashr_i8_7: 33; GFX6: ; %bb.0: 34; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 36; GFX6-NEXT: v_ashrrev_i32_e32 v0, 7, v0 37; GFX6-NEXT: s_setpc_b64 s[30:31] 38; 39; GFX8-LABEL: v_ashr_i8_7: 40; GFX8: ; %bb.0: 41; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 43; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 44; GFX8-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX9-LABEL: v_ashr_i8_7: 47; GFX9: ; %bb.0: 48; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX9-NEXT: v_mov_b32_e32 v1, 7 50; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 51; GFX9-NEXT: s_setpc_b64 s[30:31] 52 %result = ashr i8 %value, 7 53 ret i8 %result 54} 55 56define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) { 57; GFX6-LABEL: s_ashr_i8: 58; GFX6: ; %bb.0: 59; GFX6-NEXT: s_and_b32 s1, s1, 0xff 60; GFX6-NEXT: s_sext_i32_i8 s0, s0 61; GFX6-NEXT: s_ashr_i32 s0, s0, s1 62; GFX6-NEXT: ; return to shader part epilog 63; 64; GFX8-LABEL: s_ashr_i8: 65; GFX8: ; %bb.0: 66; GFX8-NEXT: s_sext_i32_i8 s0, s0 67; GFX8-NEXT: s_sext_i32_i8 s1, s1 68; GFX8-NEXT: s_ashr_i32 s0, s0, s1 69; GFX8-NEXT: ; return to shader part epilog 70; 71; GFX9-LABEL: s_ashr_i8: 72; GFX9: ; %bb.0: 73; GFX9-NEXT: s_sext_i32_i8 s0, s0 74; GFX9-NEXT: s_sext_i32_i8 s1, s1 75; GFX9-NEXT: s_ashr_i32 s0, s0, s1 76; GFX9-NEXT: ; return to shader part epilog 77 %result = ashr i8 %value, %amount 78 ret i8 %result 79} 80 81define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) { 82; GCN-LABEL: s_ashr_i8_7: 83; GCN: ; %bb.0: 84; GCN-NEXT: s_sext_i32_i8 s0, s0 85; GCN-NEXT: s_ashr_i32 s0, s0, 7 86; GCN-NEXT: ; return to shader part epilog 87 %result = ashr i8 %value, 7 88 ret i8 %result 89} 90 91 92define i24 @v_ashr_i24(i24 %value, i24 %amount) { 93; GCN-LABEL: v_ashr_i24: 94; GCN: ; %bb.0: 95; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1 97; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 98; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0 99; GCN-NEXT: s_setpc_b64 s[30:31] 100 %result = ashr i24 %value, %amount 101 ret i24 %result 102} 103 104define i24 @v_ashr_i24_7(i24 %value) { 105; GCN-LABEL: v_ashr_i24_7: 106; GCN: ; %bb.0: 107; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 109; GCN-NEXT: v_ashrrev_i32_e32 v0, 7, v0 110; GCN-NEXT: s_setpc_b64 s[30:31] 111 %result = ashr i24 %value, 7 112 ret i24 %result 113} 114 115define amdgpu_ps i24 @s_ashr_i24(i24 inreg %value, i24 inreg %amount) { 116; GCN-LABEL: s_ashr_i24: 117; GCN: ; %bb.0: 118; GCN-NEXT: s_and_b32 s1, s1, 0xffffff 119; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 120; GCN-NEXT: s_ashr_i32 s0, s0, s1 121; GCN-NEXT: ; return to shader part epilog 122 %result = ashr i24 %value, %amount 123 ret i24 %result 124} 125 126define amdgpu_ps i24 @s_ashr_i24_7(i24 inreg %value) { 127; GCN-LABEL: s_ashr_i24_7: 128; GCN: ; %bb.0: 129; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 130; GCN-NEXT: s_ashr_i32 s0, s0, 7 131; GCN-NEXT: ; return to shader part epilog 132 %result = ashr i24 %value, 7 133 ret i24 %result 134} 135 136define i32 @v_ashr_i32(i32 %value, i32 %amount) { 137; GCN-LABEL: v_ashr_i32: 138; GCN: ; %bb.0: 139; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0 141; GCN-NEXT: s_setpc_b64 s[30:31] 142 %result = ashr i32 %value, %amount 143 ret i32 %result 144} 145 146define i32 @v_ashr_i32_31(i32 %value) { 147; GCN-LABEL: v_ashr_i32_31: 148; GCN: ; %bb.0: 149; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0 151; GCN-NEXT: s_setpc_b64 s[30:31] 152 %result = ashr i32 %value, 31 153 ret i32 %result 154} 155 156define amdgpu_ps i32 @s_ashr_i32(i32 inreg %value, i32 inreg %amount) { 157; GCN-LABEL: s_ashr_i32: 158; GCN: ; %bb.0: 159; GCN-NEXT: s_ashr_i32 s0, s0, s1 160; GCN-NEXT: ; return to shader part epilog 161 %result = ashr i32 %value, %amount 162 ret i32 %result 163} 164 165define amdgpu_ps i32 @s_ashr_i32_31(i32 inreg %value) { 166; GCN-LABEL: s_ashr_i32_31: 167; GCN: ; %bb.0: 168; GCN-NEXT: s_ashr_i32 s0, s0, 31 169; GCN-NEXT: ; return to shader part epilog 170 %result = ashr i32 %value, 31 171 ret i32 %result 172} 173 174define amdgpu_ps float @ashr_i32_sv(i32 inreg %value, i32 %amount) { 175; GFX6-LABEL: ashr_i32_sv: 176; GFX6: ; %bb.0: 177; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 178; GFX6-NEXT: ; return to shader part epilog 179; 180; GFX8-LABEL: ashr_i32_sv: 181; GFX8: ; %bb.0: 182; GFX8-NEXT: v_ashrrev_i32_e64 v0, v0, s0 183; GFX8-NEXT: ; return to shader part epilog 184; 185; GFX9-LABEL: ashr_i32_sv: 186; GFX9: ; %bb.0: 187; GFX9-NEXT: v_ashrrev_i32_e64 v0, v0, s0 188; GFX9-NEXT: ; return to shader part epilog 189 %result = ashr i32 %value, %amount 190 %cast = bitcast i32 %result to float 191 ret float %cast 192} 193 194define amdgpu_ps float @ashr_i32_vs(i32 %value, i32 inreg %amount) { 195; GCN-LABEL: ashr_i32_vs: 196; GCN: ; %bb.0: 197; GCN-NEXT: v_ashrrev_i32_e32 v0, s0, v0 198; GCN-NEXT: ; return to shader part epilog 199 %result = ashr i32 %value, %amount 200 %cast = bitcast i32 %result to float 201 ret float %cast 202} 203 204define <2 x i32> @v_ashr_v2i32(<2 x i32> %value, <2 x i32> %amount) { 205; GCN-LABEL: v_ashr_v2i32: 206; GCN: ; %bb.0: 207; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GCN-NEXT: v_ashrrev_i32_e32 v0, v2, v0 209; GCN-NEXT: v_ashrrev_i32_e32 v1, v3, v1 210; GCN-NEXT: s_setpc_b64 s[30:31] 211 %result = ashr <2 x i32> %value, %amount 212 ret <2 x i32> %result 213} 214 215define <2 x i32> @v_ashr_v2i32_31(<2 x i32> %value) { 216; GCN-LABEL: v_ashr_v2i32_31: 217; GCN: ; %bb.0: 218; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0 220; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v1 221; GCN-NEXT: s_setpc_b64 s[30:31] 222 %result = ashr <2 x i32> %value, <i32 31, i32 31> 223 ret <2 x i32> %result 224} 225 226define amdgpu_ps <2 x i32> @s_ashr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) { 227; GCN-LABEL: s_ashr_v2i32: 228; GCN: ; %bb.0: 229; GCN-NEXT: s_ashr_i32 s0, s0, s2 230; GCN-NEXT: s_ashr_i32 s1, s1, s3 231; GCN-NEXT: ; return to shader part epilog 232 %result = ashr <2 x i32> %value, %amount 233 ret <2 x i32> %result 234} 235 236define <3 x i32> @v_ashr_v3i32(<3 x i32> %value, <3 x i32> %amount) { 237; GCN-LABEL: v_ashr_v3i32: 238; GCN: ; %bb.0: 239; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GCN-NEXT: v_ashrrev_i32_e32 v0, v3, v0 241; GCN-NEXT: v_ashrrev_i32_e32 v1, v4, v1 242; GCN-NEXT: v_ashrrev_i32_e32 v2, v5, v2 243; GCN-NEXT: s_setpc_b64 s[30:31] 244 %result = ashr <3 x i32> %value, %amount 245 ret <3 x i32> %result 246} 247 248define amdgpu_ps <3 x i32> @s_ashr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) { 249; GCN-LABEL: s_ashr_v3i32: 250; GCN: ; %bb.0: 251; GCN-NEXT: s_ashr_i32 s0, s0, s3 252; GCN-NEXT: s_ashr_i32 s1, s1, s4 253; GCN-NEXT: s_ashr_i32 s2, s2, s5 254; GCN-NEXT: ; return to shader part epilog 255 %result = ashr <3 x i32> %value, %amount 256 ret <3 x i32> %result 257} 258 259define <4 x i32> @v_ashr_v4i32(<4 x i32> %value, <4 x i32> %amount) { 260; GCN-LABEL: v_ashr_v4i32: 261; GCN: ; %bb.0: 262; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GCN-NEXT: v_ashrrev_i32_e32 v0, v4, v0 264; GCN-NEXT: v_ashrrev_i32_e32 v1, v5, v1 265; GCN-NEXT: v_ashrrev_i32_e32 v2, v6, v2 266; GCN-NEXT: v_ashrrev_i32_e32 v3, v7, v3 267; GCN-NEXT: s_setpc_b64 s[30:31] 268 %result = ashr <4 x i32> %value, %amount 269 ret <4 x i32> %result 270} 271 272define amdgpu_ps <4 x i32> @s_ashr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) { 273; GCN-LABEL: s_ashr_v4i32: 274; GCN: ; %bb.0: 275; GCN-NEXT: s_ashr_i32 s0, s0, s4 276; GCN-NEXT: s_ashr_i32 s1, s1, s5 277; GCN-NEXT: s_ashr_i32 s2, s2, s6 278; GCN-NEXT: s_ashr_i32 s3, s3, s7 279; GCN-NEXT: ; return to shader part epilog 280 %result = ashr <4 x i32> %value, %amount 281 ret <4 x i32> %result 282} 283 284define <5 x i32> @v_ashr_v5i32(<5 x i32> %value, <5 x i32> %amount) { 285; GCN-LABEL: v_ashr_v5i32: 286; GCN: ; %bb.0: 287; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GCN-NEXT: v_ashrrev_i32_e32 v0, v5, v0 289; GCN-NEXT: v_ashrrev_i32_e32 v1, v6, v1 290; GCN-NEXT: v_ashrrev_i32_e32 v2, v7, v2 291; GCN-NEXT: v_ashrrev_i32_e32 v3, v8, v3 292; GCN-NEXT: v_ashrrev_i32_e32 v4, v9, v4 293; GCN-NEXT: s_setpc_b64 s[30:31] 294 %result = ashr <5 x i32> %value, %amount 295 ret <5 x i32> %result 296} 297 298define amdgpu_ps <5 x i32> @s_ashr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) { 299; GCN-LABEL: s_ashr_v5i32: 300; GCN: ; %bb.0: 301; GCN-NEXT: s_ashr_i32 s0, s0, s5 302; GCN-NEXT: s_ashr_i32 s1, s1, s6 303; GCN-NEXT: s_ashr_i32 s2, s2, s7 304; GCN-NEXT: s_ashr_i32 s3, s3, s8 305; GCN-NEXT: s_ashr_i32 s4, s4, s9 306; GCN-NEXT: ; return to shader part epilog 307 %result = ashr <5 x i32> %value, %amount 308 ret <5 x i32> %result 309} 310 311define <16 x i32> @v_ashr_v16i32(<16 x i32> %value, <16 x i32> %amount) { 312; GCN-LABEL: v_ashr_v16i32: 313; GCN: ; %bb.0: 314; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GCN-NEXT: v_ashrrev_i32_e32 v0, v16, v0 316; GCN-NEXT: v_ashrrev_i32_e32 v1, v17, v1 317; GCN-NEXT: v_ashrrev_i32_e32 v2, v18, v2 318; GCN-NEXT: v_ashrrev_i32_e32 v3, v19, v3 319; GCN-NEXT: v_ashrrev_i32_e32 v4, v20, v4 320; GCN-NEXT: v_ashrrev_i32_e32 v5, v21, v5 321; GCN-NEXT: v_ashrrev_i32_e32 v6, v22, v6 322; GCN-NEXT: v_ashrrev_i32_e32 v7, v23, v7 323; GCN-NEXT: v_ashrrev_i32_e32 v8, v24, v8 324; GCN-NEXT: v_ashrrev_i32_e32 v9, v25, v9 325; GCN-NEXT: v_ashrrev_i32_e32 v10, v26, v10 326; GCN-NEXT: v_ashrrev_i32_e32 v11, v27, v11 327; GCN-NEXT: v_ashrrev_i32_e32 v12, v28, v12 328; GCN-NEXT: v_ashrrev_i32_e32 v13, v29, v13 329; GCN-NEXT: v_ashrrev_i32_e32 v14, v30, v14 330; GCN-NEXT: v_ashrrev_i32_e32 v15, v31, v15 331; GCN-NEXT: s_setpc_b64 s[30:31] 332 %result = ashr <16 x i32> %value, %amount 333 ret <16 x i32> %result 334} 335 336define amdgpu_ps <16 x i32> @s_ashr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) { 337; GCN-LABEL: s_ashr_v16i32: 338; GCN: ; %bb.0: 339; GCN-NEXT: s_ashr_i32 s0, s0, s16 340; GCN-NEXT: s_ashr_i32 s1, s1, s17 341; GCN-NEXT: s_ashr_i32 s2, s2, s18 342; GCN-NEXT: s_ashr_i32 s3, s3, s19 343; GCN-NEXT: s_ashr_i32 s4, s4, s20 344; GCN-NEXT: s_ashr_i32 s5, s5, s21 345; GCN-NEXT: s_ashr_i32 s6, s6, s22 346; GCN-NEXT: s_ashr_i32 s7, s7, s23 347; GCN-NEXT: s_ashr_i32 s8, s8, s24 348; GCN-NEXT: s_ashr_i32 s9, s9, s25 349; GCN-NEXT: s_ashr_i32 s10, s10, s26 350; GCN-NEXT: s_ashr_i32 s11, s11, s27 351; GCN-NEXT: s_ashr_i32 s12, s12, s28 352; GCN-NEXT: s_ashr_i32 s13, s13, s29 353; GCN-NEXT: s_ashr_i32 s14, s14, s30 354; GCN-NEXT: s_ashr_i32 s15, s15, s31 355; GCN-NEXT: ; return to shader part epilog 356 %result = ashr <16 x i32> %value, %amount 357 ret <16 x i32> %result 358} 359 360define i16 @v_ashr_i16(i16 %value, i16 %amount) { 361; GFX6-LABEL: v_ashr_i16: 362; GFX6: ; %bb.0: 363; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 365; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 366; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0 367; GFX6-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX8-LABEL: v_ashr_i16: 370; GFX8: ; %bb.0: 371; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0 373; GFX8-NEXT: s_setpc_b64 s[30:31] 374; 375; GFX9-LABEL: v_ashr_i16: 376; GFX9: ; %bb.0: 377; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 378; GFX9-NEXT: v_ashrrev_i16_e32 v0, v1, v0 379; GFX9-NEXT: s_setpc_b64 s[30:31] 380 %result = ashr i16 %value, %amount 381 ret i16 %result 382} 383 384define i16 @v_ashr_i16_31(i16 %value) { 385; GCN-LABEL: v_ashr_i16_31: 386; GCN: ; %bb.0: 387; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 388; GCN-NEXT: s_setpc_b64 s[30:31] 389 %result = ashr i16 %value, 31 390 ret i16 %result 391} 392 393define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) { 394; GFX6-LABEL: s_ashr_i16: 395; GFX6: ; %bb.0: 396; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 397; GFX6-NEXT: s_sext_i32_i16 s0, s0 398; GFX6-NEXT: s_ashr_i32 s0, s0, s1 399; GFX6-NEXT: ; return to shader part epilog 400; 401; GFX8-LABEL: s_ashr_i16: 402; GFX8: ; %bb.0: 403; GFX8-NEXT: s_sext_i32_i16 s0, s0 404; GFX8-NEXT: s_sext_i32_i16 s1, s1 405; GFX8-NEXT: s_ashr_i32 s0, s0, s1 406; GFX8-NEXT: ; return to shader part epilog 407; 408; GFX9-LABEL: s_ashr_i16: 409; GFX9: ; %bb.0: 410; GFX9-NEXT: s_sext_i32_i16 s0, s0 411; GFX9-NEXT: s_sext_i32_i16 s1, s1 412; GFX9-NEXT: s_ashr_i32 s0, s0, s1 413; GFX9-NEXT: ; return to shader part epilog 414 %result = ashr i16 %value, %amount 415 ret i16 %result 416} 417 418define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) { 419; GCN-LABEL: s_ashr_i16_15: 420; GCN: ; %bb.0: 421; GCN-NEXT: s_sext_i32_i16 s0, s0 422; GCN-NEXT: s_ashr_i32 s0, s0, 15 423; GCN-NEXT: ; return to shader part epilog 424 %result = ashr i16 %value, 15 425 ret i16 %result 426} 427 428define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) { 429; GFX6-LABEL: ashr_i16_sv: 430; GFX6: ; %bb.0: 431; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 432; GFX6-NEXT: s_sext_i32_i16 s0, s0 433; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 434; GFX6-NEXT: ; return to shader part epilog 435; 436; GFX8-LABEL: ashr_i16_sv: 437; GFX8: ; %bb.0: 438; GFX8-NEXT: v_ashrrev_i16_e64 v0, v0, s0 439; GFX8-NEXT: ; return to shader part epilog 440; 441; GFX9-LABEL: ashr_i16_sv: 442; GFX9: ; %bb.0: 443; GFX9-NEXT: v_ashrrev_i16_e64 v0, v0, s0 444; GFX9-NEXT: ; return to shader part epilog 445 %result = ashr i16 %value, %amount 446 %cast = bitcast i16 %result to half 447 ret half %cast 448} 449 450define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) { 451; GFX6-LABEL: ashr_i16_vs: 452; GFX6: ; %bb.0: 453; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 454; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 455; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0 456; GFX6-NEXT: ; return to shader part epilog 457; 458; GFX8-LABEL: ashr_i16_vs: 459; GFX8: ; %bb.0: 460; GFX8-NEXT: v_ashrrev_i16_e32 v0, s0, v0 461; GFX8-NEXT: ; return to shader part epilog 462; 463; GFX9-LABEL: ashr_i16_vs: 464; GFX9: ; %bb.0: 465; GFX9-NEXT: v_ashrrev_i16_e32 v0, s0, v0 466; GFX9-NEXT: ; return to shader part epilog 467 %result = ashr i16 %value, %amount 468 %cast = bitcast i16 %result to half 469 ret half %cast 470} 471 472define <2 x i16> @v_ashr_v2i16(<2 x i16> %value, <2 x i16> %amount) { 473; GFX6-LABEL: v_ashr_v2i16: 474; GFX6: ; %bb.0: 475; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX6-NEXT: s_mov_b32 s4, 0xffff 477; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 478; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 479; GFX6-NEXT: v_ashrrev_i32_e32 v0, v2, v0 480; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 481; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 482; GFX6-NEXT: v_ashrrev_i32_e32 v1, v2, v1 483; GFX6-NEXT: s_setpc_b64 s[30:31] 484; 485; GFX8-LABEL: v_ashr_v2i16: 486; GFX8: ; %bb.0: 487; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX8-NEXT: v_ashrrev_i16_e32 v2, v1, v0 489; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 490; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 491; GFX8-NEXT: s_setpc_b64 s[30:31] 492; 493; GFX9-LABEL: v_ashr_v2i16: 494; GFX9: ; %bb.0: 495; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; GFX9-NEXT: v_pk_ashrrev_i16 v0, v1, v0 497; GFX9-NEXT: s_setpc_b64 s[30:31] 498 %result = ashr <2 x i16> %value, %amount 499 ret <2 x i16> %result 500} 501 502define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) { 503; GFX6-LABEL: v_ashr_v2i16_15: 504; GFX6: ; %bb.0: 505; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 506; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 507; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 508; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0 509; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1 510; GFX6-NEXT: s_setpc_b64 s[30:31] 511; 512; GFX8-LABEL: v_ashr_v2i16_15: 513; GFX8: ; %bb.0: 514; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; GFX8-NEXT: v_mov_b32_e32 v2, 15 516; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0 517; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 518; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 519; GFX8-NEXT: s_setpc_b64 s[30:31] 520; 521; GFX9-LABEL: v_ashr_v2i16_15: 522; GFX9: ; %bb.0: 523; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 524; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] 525; GFX9-NEXT: s_setpc_b64 s[30:31] 526 %result = ashr <2 x i16> %value, <i16 15, i16 15> 527 ret <2 x i16> %result 528} 529 530define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) { 531; GFX6-LABEL: s_ashr_v2i16: 532; GFX6: ; %bb.0: 533; GFX6-NEXT: s_mov_b32 s4, 0xffff 534; GFX6-NEXT: s_and_b32 s2, s2, s4 535; GFX6-NEXT: s_sext_i32_i16 s0, s0 536; GFX6-NEXT: s_ashr_i32 s0, s0, s2 537; GFX6-NEXT: s_and_b32 s2, s3, s4 538; GFX6-NEXT: s_sext_i32_i16 s1, s1 539; GFX6-NEXT: s_ashr_i32 s1, s1, s2 540; GFX6-NEXT: s_and_b32 s1, s1, s4 541; GFX6-NEXT: s_and_b32 s0, s0, s4 542; GFX6-NEXT: s_lshl_b32 s1, s1, 16 543; GFX6-NEXT: s_or_b32 s0, s0, s1 544; GFX6-NEXT: ; return to shader part epilog 545; 546; GFX8-LABEL: s_ashr_v2i16: 547; GFX8: ; %bb.0: 548; GFX8-NEXT: s_lshr_b32 s2, s0, 16 549; GFX8-NEXT: s_lshr_b32 s3, s1, 16 550; GFX8-NEXT: s_sext_i32_i16 s0, s0 551; GFX8-NEXT: s_sext_i32_i16 s1, s1 552; GFX8-NEXT: s_sext_i32_i16 s2, s2 553; GFX8-NEXT: s_sext_i32_i16 s3, s3 554; GFX8-NEXT: s_ashr_i32 s0, s0, s1 555; GFX8-NEXT: s_ashr_i32 s1, s2, s3 556; GFX8-NEXT: s_lshl_b32 s1, s1, 16 557; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 558; GFX8-NEXT: s_or_b32 s0, s1, s0 559; GFX8-NEXT: ; return to shader part epilog 560; 561; GFX9-LABEL: s_ashr_v2i16: 562; GFX9: ; %bb.0: 563; GFX9-NEXT: s_lshr_b32 s2, s0, 16 564; GFX9-NEXT: s_lshr_b32 s3, s1, 16 565; GFX9-NEXT: s_ashr_i32 s0, s0, s1 566; GFX9-NEXT: s_ashr_i32 s1, s2, s3 567; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 568; GFX9-NEXT: ; return to shader part epilog 569 %result = ashr <2 x i16> %value, %amount 570 %cast = bitcast <2 x i16> %result to i32 571 ret i32 %cast 572} 573 574define amdgpu_ps float @ashr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) { 575; GFX6-LABEL: ashr_v2i16_sv: 576; GFX6: ; %bb.0: 577; GFX6-NEXT: s_mov_b32 s2, 0xffff 578; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 579; GFX6-NEXT: s_sext_i32_i16 s0, s0 580; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 581; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 582; GFX6-NEXT: s_sext_i32_i16 s0, s1 583; GFX6-NEXT: v_ashr_i32_e32 v1, s0, v1 584; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 585; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 586; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 587; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 588; GFX6-NEXT: ; return to shader part epilog 589; 590; GFX8-LABEL: ashr_v2i16_sv: 591; GFX8: ; %bb.0: 592; GFX8-NEXT: s_lshr_b32 s1, s0, 16 593; GFX8-NEXT: v_mov_b32_e32 v2, s1 594; GFX8-NEXT: v_ashrrev_i16_e64 v1, v0, s0 595; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 596; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 597; GFX8-NEXT: ; return to shader part epilog 598; 599; GFX9-LABEL: ashr_v2i16_sv: 600; GFX9: ; %bb.0: 601; GFX9-NEXT: v_pk_ashrrev_i16 v0, v0, s0 602; GFX9-NEXT: ; return to shader part epilog 603 %result = ashr <2 x i16> %value, %amount 604 %cast = bitcast <2 x i16> %result to float 605 ret float %cast 606} 607 608define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { 609; GFX6-LABEL: ashr_v2i16_vs: 610; GFX6: ; %bb.0: 611; GFX6-NEXT: s_mov_b32 s2, 0xffff 612; GFX6-NEXT: s_and_b32 s0, s0, s2 613; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 614; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0 615; GFX6-NEXT: s_and_b32 s0, s1, s2 616; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 617; GFX6-NEXT: v_ashrrev_i32_e32 v1, s0, v1 618; GFX6-NEXT: v_and_b32_e32 v1, s2, v1 619; GFX6-NEXT: v_and_b32_e32 v0, s2, v0 620; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 621; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 622; GFX6-NEXT: ; return to shader part epilog 623; 624; GFX8-LABEL: ashr_v2i16_vs: 625; GFX8: ; %bb.0: 626; GFX8-NEXT: s_lshr_b32 s1, s0, 16 627; GFX8-NEXT: v_mov_b32_e32 v2, s1 628; GFX8-NEXT: v_ashrrev_i16_e32 v1, s0, v0 629; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 630; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 631; GFX8-NEXT: ; return to shader part epilog 632; 633; GFX9-LABEL: ashr_v2i16_vs: 634; GFX9: ; %bb.0: 635; GFX9-NEXT: v_pk_ashrrev_i16 v0, s0, v0 636; GFX9-NEXT: ; return to shader part epilog 637 %result = ashr <2 x i16> %value, %amount 638 %cast = bitcast <2 x i16> %result to float 639 ret float %cast 640} 641 642; FIXME 643; define <3 x i16> @v_ashr_v3i16(<3 x i16> %value, <3 x i16> %amount) { 644; %result = ashr <3 x i16> %value, %amount 645; ret <3 x i16> %result 646; } 647 648; define amdgpu_ps <3 x i16> @s_ashr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) { 649; %result = ashr <3 x i16> %value, %amount 650; ret <3 x i16> %result 651; } 652 653define <2 x float> @v_ashr_v4i16(<4 x i16> %value, <4 x i16> %amount) { 654; GFX6-LABEL: v_ashr_v4i16: 655; GFX6: ; %bb.0: 656; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX6-NEXT: s_mov_b32 s4, 0xffff 658; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 659; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 660; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0 661; GFX6-NEXT: v_and_b32_e32 v4, s4, v5 662; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 663; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1 664; GFX6-NEXT: v_and_b32_e32 v4, s4, v6 665; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 666; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 667; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2 668; GFX6-NEXT: v_and_b32_e32 v4, s4, v7 669; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 670; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3 671; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 672; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 673; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 674; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 675; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 676; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 677; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 678; GFX6-NEXT: s_setpc_b64 s[30:31] 679; 680; GFX8-LABEL: v_ashr_v4i16: 681; GFX8: ; %bb.0: 682; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 683; GFX8-NEXT: v_ashrrev_i16_e32 v4, v2, v0 684; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 685; GFX8-NEXT: v_ashrrev_i16_e32 v2, v3, v1 686; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 687; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 688; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 689; GFX8-NEXT: s_setpc_b64 s[30:31] 690; 691; GFX9-LABEL: v_ashr_v4i16: 692; GFX9: ; %bb.0: 693; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 694; GFX9-NEXT: v_pk_ashrrev_i16 v0, v2, v0 695; GFX9-NEXT: v_pk_ashrrev_i16 v1, v3, v1 696; GFX9-NEXT: s_setpc_b64 s[30:31] 697 %result = ashr <4 x i16> %value, %amount 698 %cast = bitcast <4 x i16> %result to <2 x float> 699 ret <2 x float> %cast 700} 701 702define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) { 703; GFX6-LABEL: s_ashr_v4i16: 704; GFX6: ; %bb.0: 705; GFX6-NEXT: s_mov_b32 s8, 0xffff 706; GFX6-NEXT: s_and_b32 s4, s4, s8 707; GFX6-NEXT: s_sext_i32_i16 s0, s0 708; GFX6-NEXT: s_ashr_i32 s0, s0, s4 709; GFX6-NEXT: s_and_b32 s4, s5, s8 710; GFX6-NEXT: s_sext_i32_i16 s1, s1 711; GFX6-NEXT: s_ashr_i32 s1, s1, s4 712; GFX6-NEXT: s_and_b32 s4, s6, s8 713; GFX6-NEXT: s_sext_i32_i16 s2, s2 714; GFX6-NEXT: s_and_b32 s1, s1, s8 715; GFX6-NEXT: s_ashr_i32 s2, s2, s4 716; GFX6-NEXT: s_and_b32 s4, s7, s8 717; GFX6-NEXT: s_sext_i32_i16 s3, s3 718; GFX6-NEXT: s_ashr_i32 s3, s3, s4 719; GFX6-NEXT: s_and_b32 s0, s0, s8 720; GFX6-NEXT: s_lshl_b32 s1, s1, 16 721; GFX6-NEXT: s_or_b32 s0, s0, s1 722; GFX6-NEXT: s_and_b32 s1, s2, s8 723; GFX6-NEXT: s_and_b32 s2, s3, s8 724; GFX6-NEXT: s_lshl_b32 s2, s2, 16 725; GFX6-NEXT: s_or_b32 s1, s1, s2 726; GFX6-NEXT: ; return to shader part epilog 727; 728; GFX8-LABEL: s_ashr_v4i16: 729; GFX8: ; %bb.0: 730; GFX8-NEXT: s_lshr_b32 s4, s0, 16 731; GFX8-NEXT: s_lshr_b32 s6, s2, 16 732; GFX8-NEXT: s_lshr_b32 s5, s1, 16 733; GFX8-NEXT: s_lshr_b32 s7, s3, 16 734; GFX8-NEXT: s_sext_i32_i16 s0, s0 735; GFX8-NEXT: s_sext_i32_i16 s2, s2 736; GFX8-NEXT: s_sext_i32_i16 s4, s4 737; GFX8-NEXT: s_sext_i32_i16 s6, s6 738; GFX8-NEXT: s_ashr_i32 s0, s0, s2 739; GFX8-NEXT: s_ashr_i32 s2, s4, s6 740; GFX8-NEXT: s_mov_b32 s4, 0xffff 741; GFX8-NEXT: s_sext_i32_i16 s1, s1 742; GFX8-NEXT: s_sext_i32_i16 s3, s3 743; GFX8-NEXT: s_sext_i32_i16 s5, s5 744; GFX8-NEXT: s_sext_i32_i16 s7, s7 745; GFX8-NEXT: s_ashr_i32 s1, s1, s3 746; GFX8-NEXT: s_ashr_i32 s3, s5, s7 747; GFX8-NEXT: s_lshl_b32 s2, s2, 16 748; GFX8-NEXT: s_and_b32 s0, s0, s4 749; GFX8-NEXT: s_or_b32 s0, s2, s0 750; GFX8-NEXT: s_lshl_b32 s2, s3, 16 751; GFX8-NEXT: s_and_b32 s1, s1, s4 752; GFX8-NEXT: s_or_b32 s1, s2, s1 753; GFX8-NEXT: ; return to shader part epilog 754; 755; GFX9-LABEL: s_ashr_v4i16: 756; GFX9: ; %bb.0: 757; GFX9-NEXT: s_lshr_b32 s4, s0, 16 758; GFX9-NEXT: s_lshr_b32 s5, s2, 16 759; GFX9-NEXT: s_ashr_i32 s0, s0, s2 760; GFX9-NEXT: s_ashr_i32 s2, s4, s5 761; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 762; GFX9-NEXT: s_lshr_b32 s2, s1, 16 763; GFX9-NEXT: s_lshr_b32 s4, s3, 16 764; GFX9-NEXT: s_ashr_i32 s1, s1, s3 765; GFX9-NEXT: s_ashr_i32 s2, s2, s4 766; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 767; GFX9-NEXT: ; return to shader part epilog 768 %result = ashr <4 x i16> %value, %amount 769 %cast = bitcast <4 x i16> %result to <2 x i32> 770 ret <2 x i32> %cast 771} 772 773; FIXME 774; define <5 x i16> @v_ashr_v5i16(<5 x i16> %value, <5 x i16> %amount) { 775; %result = ashr <5 x i16> %value, %amount 776; ret <5 x i16> %result 777; } 778 779; define amdgpu_ps <5 x i16> @s_ashr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) { 780; %result = ashr <5 x i16> %value, %amount 781; ret <5 x i16> %result 782; } 783 784; define <3 x float> @v_ashr_v6i16(<6 x i16> %value, <6 x i16> %amount) { 785; %result = ashr <6 x i16> %value, %amount 786; %cast = bitcast <6 x i16> %result to <3 x float> 787; ret <3 x float> %cast 788; } 789 790; define amdgpu_ps <3 x i32> @s_ashr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) { 791; %result = ashr <6 x i16> %value, %amount 792; %cast = bitcast <6 x i16> %result to <3 x i32> 793; ret <3 x i32> %cast 794; } 795 796define <4 x float> @v_ashr_v8i16(<8 x i16> %value, <8 x i16> %amount) { 797; GFX6-LABEL: v_ashr_v8i16: 798; GFX6: ; %bb.0: 799; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 800; GFX6-NEXT: s_mov_b32 s4, 0xffff 801; GFX6-NEXT: v_and_b32_e32 v8, s4, v8 802; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 803; GFX6-NEXT: v_ashrrev_i32_e32 v0, v8, v0 804; GFX6-NEXT: v_and_b32_e32 v8, s4, v9 805; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 806; GFX6-NEXT: v_ashrrev_i32_e32 v1, v8, v1 807; GFX6-NEXT: v_and_b32_e32 v8, s4, v10 808; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 809; GFX6-NEXT: v_ashrrev_i32_e32 v2, v8, v2 810; GFX6-NEXT: v_and_b32_e32 v8, s4, v11 811; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 812; GFX6-NEXT: v_mov_b32_e32 v16, 0xffff 813; GFX6-NEXT: v_ashrrev_i32_e32 v3, v8, v3 814; GFX6-NEXT: v_and_b32_e32 v8, s4, v12 815; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 816; GFX6-NEXT: v_and_b32_e32 v1, v1, v16 817; GFX6-NEXT: v_ashrrev_i32_e32 v4, v8, v4 818; GFX6-NEXT: v_and_b32_e32 v8, s4, v13 819; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 820; GFX6-NEXT: v_ashrrev_i32_e32 v5, v8, v5 821; GFX6-NEXT: v_and_b32_e32 v8, s4, v14 822; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 823; GFX6-NEXT: v_and_b32_e32 v0, v0, v16 824; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 825; GFX6-NEXT: v_ashrrev_i32_e32 v6, v8, v6 826; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 827; GFX6-NEXT: v_and_b32_e32 v1, v2, v16 828; GFX6-NEXT: v_and_b32_e32 v2, v3, v16 829; GFX6-NEXT: v_and_b32_e32 v8, v15, v16 830; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 831; GFX6-NEXT: v_and_b32_e32 v3, v5, v16 832; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 833; GFX6-NEXT: v_ashrrev_i32_e32 v7, v8, v7 834; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 835; GFX6-NEXT: v_and_b32_e32 v2, v4, v16 836; GFX6-NEXT: v_and_b32_e32 v4, v7, v16 837; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 838; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 839; GFX6-NEXT: v_and_b32_e32 v3, v6, v16 840; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 841; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 842; GFX6-NEXT: s_setpc_b64 s[30:31] 843; 844; GFX8-LABEL: v_ashr_v8i16: 845; GFX8: ; %bb.0: 846; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 847; GFX8-NEXT: v_ashrrev_i16_e32 v8, v4, v0 848; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 849; GFX8-NEXT: v_ashrrev_i16_e32 v4, v5, v1 850; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 851; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 852; GFX8-NEXT: v_ashrrev_i16_e32 v4, v6, v2 853; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 854; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 855; GFX8-NEXT: v_ashrrev_i16_e32 v4, v7, v3 856; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 857; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 858; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 859; GFX8-NEXT: s_setpc_b64 s[30:31] 860; 861; GFX9-LABEL: v_ashr_v8i16: 862; GFX9: ; %bb.0: 863; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; GFX9-NEXT: v_pk_ashrrev_i16 v0, v4, v0 865; GFX9-NEXT: v_pk_ashrrev_i16 v1, v5, v1 866; GFX9-NEXT: v_pk_ashrrev_i16 v2, v6, v2 867; GFX9-NEXT: v_pk_ashrrev_i16 v3, v7, v3 868; GFX9-NEXT: s_setpc_b64 s[30:31] 869 %result = ashr <8 x i16> %value, %amount 870 %cast = bitcast <8 x i16> %result to <4 x float> 871 ret <4 x float> %cast 872} 873 874define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) { 875; GFX6-LABEL: s_ashr_v8i16: 876; GFX6: ; %bb.0: 877; GFX6-NEXT: s_mov_b32 s16, 0xffff 878; GFX6-NEXT: s_and_b32 s8, s8, s16 879; GFX6-NEXT: s_sext_i32_i16 s0, s0 880; GFX6-NEXT: s_ashr_i32 s0, s0, s8 881; GFX6-NEXT: s_and_b32 s8, s9, s16 882; GFX6-NEXT: s_sext_i32_i16 s1, s1 883; GFX6-NEXT: s_ashr_i32 s1, s1, s8 884; GFX6-NEXT: s_and_b32 s8, s10, s16 885; GFX6-NEXT: s_sext_i32_i16 s2, s2 886; GFX6-NEXT: s_ashr_i32 s2, s2, s8 887; GFX6-NEXT: s_and_b32 s8, s11, s16 888; GFX6-NEXT: s_sext_i32_i16 s3, s3 889; GFX6-NEXT: s_ashr_i32 s3, s3, s8 890; GFX6-NEXT: s_and_b32 s8, s12, s16 891; GFX6-NEXT: s_sext_i32_i16 s4, s4 892; GFX6-NEXT: s_and_b32 s1, s1, s16 893; GFX6-NEXT: s_ashr_i32 s4, s4, s8 894; GFX6-NEXT: s_and_b32 s8, s13, s16 895; GFX6-NEXT: s_sext_i32_i16 s5, s5 896; GFX6-NEXT: s_ashr_i32 s5, s5, s8 897; GFX6-NEXT: s_and_b32 s8, s14, s16 898; GFX6-NEXT: s_sext_i32_i16 s6, s6 899; GFX6-NEXT: s_and_b32 s0, s0, s16 900; GFX6-NEXT: s_lshl_b32 s1, s1, 16 901; GFX6-NEXT: s_ashr_i32 s6, s6, s8 902; GFX6-NEXT: s_or_b32 s0, s0, s1 903; GFX6-NEXT: s_and_b32 s1, s2, s16 904; GFX6-NEXT: s_and_b32 s2, s3, s16 905; GFX6-NEXT: s_and_b32 s8, s15, s16 906; GFX6-NEXT: s_sext_i32_i16 s7, s7 907; GFX6-NEXT: s_and_b32 s3, s5, s16 908; GFX6-NEXT: s_lshl_b32 s2, s2, 16 909; GFX6-NEXT: s_ashr_i32 s7, s7, s8 910; GFX6-NEXT: s_or_b32 s1, s1, s2 911; GFX6-NEXT: s_and_b32 s2, s4, s16 912; GFX6-NEXT: s_and_b32 s4, s7, s16 913; GFX6-NEXT: s_lshl_b32 s3, s3, 16 914; GFX6-NEXT: s_or_b32 s2, s2, s3 915; GFX6-NEXT: s_and_b32 s3, s6, s16 916; GFX6-NEXT: s_lshl_b32 s4, s4, 16 917; GFX6-NEXT: s_or_b32 s3, s3, s4 918; GFX6-NEXT: ; return to shader part epilog 919; 920; GFX8-LABEL: s_ashr_v8i16: 921; GFX8: ; %bb.0: 922; GFX8-NEXT: s_lshr_b32 s8, s0, 16 923; GFX8-NEXT: s_lshr_b32 s12, s4, 16 924; GFX8-NEXT: s_lshr_b32 s9, s1, 16 925; GFX8-NEXT: s_lshr_b32 s13, s5, 16 926; GFX8-NEXT: s_sext_i32_i16 s0, s0 927; GFX8-NEXT: s_sext_i32_i16 s4, s4 928; GFX8-NEXT: s_sext_i32_i16 s8, s8 929; GFX8-NEXT: s_sext_i32_i16 s12, s12 930; GFX8-NEXT: s_lshr_b32 s10, s2, 16 931; GFX8-NEXT: s_lshr_b32 s14, s6, 16 932; GFX8-NEXT: s_ashr_i32 s0, s0, s4 933; GFX8-NEXT: s_ashr_i32 s4, s8, s12 934; GFX8-NEXT: s_mov_b32 s8, 0xffff 935; GFX8-NEXT: s_sext_i32_i16 s1, s1 936; GFX8-NEXT: s_sext_i32_i16 s5, s5 937; GFX8-NEXT: s_sext_i32_i16 s9, s9 938; GFX8-NEXT: s_sext_i32_i16 s13, s13 939; GFX8-NEXT: s_lshr_b32 s11, s3, 16 940; GFX8-NEXT: s_lshr_b32 s15, s7, 16 941; GFX8-NEXT: s_ashr_i32 s1, s1, s5 942; GFX8-NEXT: s_sext_i32_i16 s2, s2 943; GFX8-NEXT: s_sext_i32_i16 s6, s6 944; GFX8-NEXT: s_sext_i32_i16 s10, s10 945; GFX8-NEXT: s_sext_i32_i16 s14, s14 946; GFX8-NEXT: s_ashr_i32 s5, s9, s13 947; GFX8-NEXT: s_lshl_b32 s4, s4, 16 948; GFX8-NEXT: s_and_b32 s0, s0, s8 949; GFX8-NEXT: s_ashr_i32 s2, s2, s6 950; GFX8-NEXT: s_or_b32 s0, s4, s0 951; GFX8-NEXT: s_sext_i32_i16 s3, s3 952; GFX8-NEXT: s_sext_i32_i16 s7, s7 953; GFX8-NEXT: s_sext_i32_i16 s11, s11 954; GFX8-NEXT: s_sext_i32_i16 s15, s15 955; GFX8-NEXT: s_ashr_i32 s6, s10, s14 956; GFX8-NEXT: s_lshl_b32 s4, s5, 16 957; GFX8-NEXT: s_and_b32 s1, s1, s8 958; GFX8-NEXT: s_ashr_i32 s3, s3, s7 959; GFX8-NEXT: s_or_b32 s1, s4, s1 960; GFX8-NEXT: s_ashr_i32 s7, s11, s15 961; GFX8-NEXT: s_lshl_b32 s4, s6, 16 962; GFX8-NEXT: s_and_b32 s2, s2, s8 963; GFX8-NEXT: s_or_b32 s2, s4, s2 964; GFX8-NEXT: s_lshl_b32 s4, s7, 16 965; GFX8-NEXT: s_and_b32 s3, s3, s8 966; GFX8-NEXT: s_or_b32 s3, s4, s3 967; GFX8-NEXT: ; return to shader part epilog 968; 969; GFX9-LABEL: s_ashr_v8i16: 970; GFX9: ; %bb.0: 971; GFX9-NEXT: s_lshr_b32 s8, s0, 16 972; GFX9-NEXT: s_lshr_b32 s9, s4, 16 973; GFX9-NEXT: s_ashr_i32 s0, s0, s4 974; GFX9-NEXT: s_ashr_i32 s4, s8, s9 975; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 976; GFX9-NEXT: s_lshr_b32 s4, s1, 16 977; GFX9-NEXT: s_lshr_b32 s8, s5, 16 978; GFX9-NEXT: s_ashr_i32 s1, s1, s5 979; GFX9-NEXT: s_ashr_i32 s4, s4, s8 980; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 981; GFX9-NEXT: s_lshr_b32 s4, s2, 16 982; GFX9-NEXT: s_lshr_b32 s5, s6, 16 983; GFX9-NEXT: s_ashr_i32 s4, s4, s5 984; GFX9-NEXT: s_ashr_i32 s2, s2, s6 985; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 986; GFX9-NEXT: s_lshr_b32 s4, s3, 16 987; GFX9-NEXT: s_lshr_b32 s5, s7, 16 988; GFX9-NEXT: s_ashr_i32 s3, s3, s7 989; GFX9-NEXT: s_ashr_i32 s4, s4, s5 990; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 991; GFX9-NEXT: ; return to shader part epilog 992 %result = ashr <8 x i16> %value, %amount 993 %cast = bitcast <8 x i16> %result to <4 x i32> 994 ret <4 x i32> %cast 995} 996 997define i64 @v_ashr_i64(i64 %value, i64 %amount) { 998; GFX6-LABEL: v_ashr_i64: 999; GFX6: ; %bb.0: 1000; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1001; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v2 1002; GFX6-NEXT: s_setpc_b64 s[30:31] 1003; 1004; GFX8-LABEL: v_ashr_i64: 1005; GFX8: ; %bb.0: 1006; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1007; GFX8-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 1008; GFX8-NEXT: s_setpc_b64 s[30:31] 1009; 1010; GFX9-LABEL: v_ashr_i64: 1011; GFX9: ; %bb.0: 1012; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1013; GFX9-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 1014; GFX9-NEXT: s_setpc_b64 s[30:31] 1015 %result = ashr i64 %value, %amount 1016 ret i64 %result 1017} 1018 1019define i64 @v_ashr_i64_63(i64 %value) { 1020; GCN-LABEL: v_ashr_i64_63: 1021; GCN: ; %bb.0: 1022; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1023; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v1 1024; GCN-NEXT: v_mov_b32_e32 v1, v0 1025; GCN-NEXT: s_setpc_b64 s[30:31] 1026 %result = ashr i64 %value, 63 1027 ret i64 %result 1028} 1029 1030define i64 @v_ashr_i64_33(i64 %value) { 1031; GCN-LABEL: v_ashr_i64_33: 1032; GCN: ; %bb.0: 1033; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1035; GCN-NEXT: v_ashrrev_i32_e32 v0, 1, v1 1036; GCN-NEXT: v_mov_b32_e32 v1, v2 1037; GCN-NEXT: s_setpc_b64 s[30:31] 1038 %result = ashr i64 %value, 33 1039 ret i64 %result 1040} 1041 1042define i64 @v_ashr_i64_32(i64 %value) { 1043; GCN-LABEL: v_ashr_i64_32: 1044; GCN: ; %bb.0: 1045; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1046; GCN-NEXT: v_mov_b32_e32 v0, v1 1047; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1048; GCN-NEXT: s_setpc_b64 s[30:31] 1049 %result = ashr i64 %value, 32 1050 ret i64 %result 1051} 1052 1053define i64 @v_ashr_i64_31(i64 %value) { 1054; GFX6-LABEL: v_ashr_i64_31: 1055; GFX6: ; %bb.0: 1056; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1057; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31 1058; GFX6-NEXT: s_setpc_b64 s[30:31] 1059; 1060; GFX8-LABEL: v_ashr_i64_31: 1061; GFX8: ; %bb.0: 1062; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1063; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1064; GFX8-NEXT: s_setpc_b64 s[30:31] 1065; 1066; GFX9-LABEL: v_ashr_i64_31: 1067; GFX9: ; %bb.0: 1068; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1069; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1070; GFX9-NEXT: s_setpc_b64 s[30:31] 1071 %result = ashr i64 %value, 31 1072 ret i64 %result 1073} 1074 1075define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value, i64 inreg %amount) { 1076; GCN-LABEL: s_ashr_i64: 1077; GCN: ; %bb.0: 1078; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 1079; GCN-NEXT: ; return to shader part epilog 1080 %result = ashr i64 %value, %amount 1081 ret i64 %result 1082} 1083 1084define amdgpu_ps i64 @s_ashr_i64_63(i64 inreg %value) { 1085; GCN-LABEL: s_ashr_i64_63: 1086; GCN: ; %bb.0: 1087; GCN-NEXT: s_ashr_i32 s0, s1, 31 1088; GCN-NEXT: s_mov_b32 s1, s0 1089; GCN-NEXT: ; return to shader part epilog 1090 %result = ashr i64 %value, 63 1091 ret i64 %result 1092} 1093 1094define amdgpu_ps i64 @s_ashr_i64_33(i64 inreg %value) { 1095; GCN-LABEL: s_ashr_i64_33: 1096; GCN: ; %bb.0: 1097; GCN-NEXT: s_ashr_i32 s2, s1, 31 1098; GCN-NEXT: s_ashr_i32 s0, s1, 1 1099; GCN-NEXT: s_mov_b32 s1, s2 1100; GCN-NEXT: ; return to shader part epilog 1101 %result = ashr i64 %value, 33 1102 ret i64 %result 1103} 1104 1105define amdgpu_ps i64 @s_ashr_i64_32(i64 inreg %value) { 1106; GCN-LABEL: s_ashr_i64_32: 1107; GCN: ; %bb.0: 1108; GCN-NEXT: s_mov_b32 s0, s1 1109; GCN-NEXT: s_ashr_i32 s1, s1, 31 1110; GCN-NEXT: ; return to shader part epilog 1111 %result = ashr i64 %value, 32 1112 ret i64 %result 1113} 1114 1115define amdgpu_ps i64 @s_ashr_i64_31(i64 inreg %value) { 1116; GCN-LABEL: s_ashr_i64_31: 1117; GCN: ; %bb.0: 1118; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], 31 1119; GCN-NEXT: ; return to shader part epilog 1120 %result = ashr i64 %value, 31 1121 ret i64 %result 1122} 1123 1124define amdgpu_ps <2 x float> @ashr_i64_sv(i64 inreg %value, i64 %amount) { 1125; GFX6-LABEL: ashr_i64_sv: 1126; GFX6: ; %bb.0: 1127; GFX6-NEXT: v_ashr_i64 v[0:1], s[0:1], v0 1128; GFX6-NEXT: ; return to shader part epilog 1129; 1130; GFX8-LABEL: ashr_i64_sv: 1131; GFX8: ; %bb.0: 1132; GFX8-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1] 1133; GFX8-NEXT: ; return to shader part epilog 1134; 1135; GFX9-LABEL: ashr_i64_sv: 1136; GFX9: ; %bb.0: 1137; GFX9-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1] 1138; GFX9-NEXT: ; return to shader part epilog 1139 %result = ashr i64 %value, %amount 1140 %cast = bitcast i64 %result to <2 x float> 1141 ret <2 x float> %cast 1142} 1143 1144define amdgpu_ps <2 x float> @ashr_i64_vs(i64 %value, i64 inreg %amount) { 1145; GFX6-LABEL: ashr_i64_vs: 1146; GFX6: ; %bb.0: 1147; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], s0 1148; GFX6-NEXT: ; return to shader part epilog 1149; 1150; GFX8-LABEL: ashr_i64_vs: 1151; GFX8: ; %bb.0: 1152; GFX8-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1] 1153; GFX8-NEXT: ; return to shader part epilog 1154; 1155; GFX9-LABEL: ashr_i64_vs: 1156; GFX9: ; %bb.0: 1157; GFX9-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1] 1158; GFX9-NEXT: ; return to shader part epilog 1159 %result = ashr i64 %value, %amount 1160 %cast = bitcast i64 %result to <2 x float> 1161 ret <2 x float> %cast 1162} 1163 1164define <2 x i64> @v_ashr_v2i64(<2 x i64> %value, <2 x i64> %amount) { 1165; GFX6-LABEL: v_ashr_v2i64: 1166; GFX6: ; %bb.0: 1167; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v4 1169; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], v6 1170; GFX6-NEXT: s_setpc_b64 s[30:31] 1171; 1172; GFX8-LABEL: v_ashr_v2i64: 1173; GFX8: ; %bb.0: 1174; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1175; GFX8-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1] 1176; GFX8-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3] 1177; GFX8-NEXT: s_setpc_b64 s[30:31] 1178; 1179; GFX9-LABEL: v_ashr_v2i64: 1180; GFX9: ; %bb.0: 1181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1182; GFX9-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1] 1183; GFX9-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3] 1184; GFX9-NEXT: s_setpc_b64 s[30:31] 1185 %result = ashr <2 x i64> %value, %amount 1186 ret <2 x i64> %result 1187} 1188 1189define <2 x i64> @v_ashr_v2i64_31(<2 x i64> %value) { 1190; GFX6-LABEL: v_ashr_v2i64_31: 1191; GFX6: ; %bb.0: 1192; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1193; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31 1194; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 31 1195; GFX6-NEXT: s_setpc_b64 s[30:31] 1196; 1197; GFX8-LABEL: v_ashr_v2i64_31: 1198; GFX8: ; %bb.0: 1199; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1200; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1201; GFX8-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3] 1202; GFX8-NEXT: s_setpc_b64 s[30:31] 1203; 1204; GFX9-LABEL: v_ashr_v2i64_31: 1205; GFX9: ; %bb.0: 1206; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1207; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1208; GFX9-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3] 1209; GFX9-NEXT: s_setpc_b64 s[30:31] 1210 %result = ashr <2 x i64> %value, <i64 31, i64 31> 1211 ret <2 x i64> %result 1212} 1213 1214define amdgpu_ps <2 x i64> @s_ashr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) { 1215; GCN-LABEL: s_ashr_v2i64: 1216; GCN: ; %bb.0: 1217; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s4 1218; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s6 1219; GCN-NEXT: ; return to shader part epilog 1220 %result = ashr <2 x i64> %value, %amount 1221 ret <2 x i64> %result 1222} 1223