1; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone 5 6 7; FUNC-LABEL: @sext_in_reg_i1_i32 8; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], 9; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 10; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] 11; SI: BUFFER_STORE_DWORD [[EXTRACT]], 12 13; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 14; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 15; EG-NEXT: LSHR * [[ADDR]] 16define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { 17 %shl = shl i32 %in, 31 18 %sext = ashr i32 %shl, 31 19 store i32 %sext, i32 addrspace(1)* %out 20 ret void 21} 22 23; FUNC-LABEL: @sext_in_reg_i8_to_i32 24; SI: S_ADD_I32 [[VAL:s[0-9]+]], 25; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] 26; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 27; SI: BUFFER_STORE_DWORD [[VEXTRACT]], 28 29; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 30; EG: ADD_INT 31; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 32; EG-NEXT: LSHR * [[ADDR]] 33define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 34 %c = add i32 %a, %b ; add to prevent folding into extload 35 %shl = shl i32 %c, 24 36 %ashr = ashr i32 %shl, 24 37 store i32 %ashr, i32 addrspace(1)* %out, align 4 38 ret void 39} 40 41; FUNC-LABEL: @sext_in_reg_i16_to_i32 42; SI: S_ADD_I32 [[VAL:s[0-9]+]], 43; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] 44; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 45; SI: BUFFER_STORE_DWORD [[VEXTRACT]], 46 47; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 48; EG: ADD_INT 49; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 50; EG-NEXT: LSHR * [[ADDR]] 51define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 52 %c = add i32 %a, %b ; add to prevent folding into extload 53 %shl = shl i32 %c, 16 54 %ashr = ashr i32 %shl, 16 55 store i32 %ashr, i32 addrspace(1)* %out, align 4 56 ret void 57} 58 59; FUNC-LABEL: @sext_in_reg_i8_to_v1i32 60; SI: S_ADD_I32 [[VAL:s[0-9]+]], 61; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] 62; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 63; SI: BUFFER_STORE_DWORD [[VEXTRACT]], 64 65; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 66; EG: ADD_INT 67; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 68; EG-NEXT: LSHR * [[ADDR]] 69define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { 70 %c = add <1 x i32> %a, %b ; add to prevent folding into extload 71 %shl = shl <1 x i32> %c, <i32 24> 72 %ashr = ashr <1 x i32> %shl, <i32 24> 73 store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4 74 ret void 75} 76 77; FUNC-LABEL: @sext_in_reg_i1_to_i64 78; SI: S_ADD_I32 [[VAL:s[0-9]+]], 79; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000 80; SI: S_MOV_B32 {{s[0-9]+}}, -1 81; SI: BUFFER_STORE_DWORDX2 82define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 83 %c = add i64 %a, %b 84 %shl = shl i64 %c, 63 85 %ashr = ashr i64 %shl, 63 86 store i64 %ashr, i64 addrspace(1)* %out, align 8 87 ret void 88} 89 90; FUNC-LABEL: @sext_in_reg_i8_to_i64 91; SI: S_ADD_I32 [[VAL:s[0-9]+]], 92; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] 93; SI: S_MOV_B32 {{s[0-9]+}}, -1 94; SI: BUFFER_STORE_DWORDX2 95 96; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] 97; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] 98; EG: ADD_INT 99; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal 100; EG: ASHR [[RES_HI]] 101; EG-NOT: BFE_INT 102; EG: LSHR 103; EG: LSHR 104;; TODO Check address computation, using | with variables in {{}} does not work, 105;; also the _LO/_HI order might be different 106define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 107 %c = add i64 %a, %b 108 %shl = shl i64 %c, 56 109 %ashr = ashr i64 %shl, 56 110 store i64 %ashr, i64 addrspace(1)* %out, align 8 111 ret void 112} 113 114; FUNC-LABEL: @sext_in_reg_i16_to_i64 115; SI: S_ADD_I32 [[VAL:s[0-9]+]], 116; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] 117; SI: S_MOV_B32 {{s[0-9]+}}, -1 118; SI: BUFFER_STORE_DWORDX2 119 120; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] 121; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] 122; EG: ADD_INT 123; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal 124; EG: ASHR [[RES_HI]] 125; EG-NOT: BFE_INT 126; EG: LSHR 127; EG: LSHR 128;; TODO Check address computation, using | with variables in {{}} does not work, 129;; also the _LO/_HI order might be different 130define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 131 %c = add i64 %a, %b 132 %shl = shl i64 %c, 48 133 %ashr = ashr i64 %shl, 48 134 store i64 %ashr, i64 addrspace(1)* %out, align 8 135 ret void 136} 137 138; FUNC-LABEL: @sext_in_reg_i32_to_i64 139; SI: S_LOAD_DWORD 140; SI: S_LOAD_DWORD 141; SI: S_ADD_I32 [[ADD:s[0-9]+]], 142; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31 143; SI: BUFFER_STORE_DWORDX2 144 145; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] 146; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] 147; EG-NOT: BFE_INT 148; EG: ADD_INT {{\*?}} [[RES_LO]] 149; EG: ASHR [[RES_HI]] 150; EG: ADD_INT 151; EG: LSHR 152; EG: LSHR 153;; TODO Check address computation, using | with variables in {{}} does not work, 154;; also the _LO/_HI order might be different 155define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 156 %c = add i64 %a, %b 157 %shl = shl i64 %c, 32 158 %ashr = ashr i64 %shl, 32 159 store i64 %ashr, i64 addrspace(1)* %out, align 8 160 ret void 161} 162 163; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. 164; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64 165; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 166; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31 167; XSI: BUFFER_STORE_DWORD 168; XEG: BFE_INT 169; XEG: ASHR 170; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind { 171; %c = add <1 x i64> %a, %b 172; %shl = shl <1 x i64> %c, <i64 56> 173; %ashr = ashr <1 x i64> %shl, <i64 56> 174; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8 175; ret void 176; } 177 178; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount 179; SI-NOT: BFE 180; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6 181; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7 182 183; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 184; EG-NOT: BFE 185; EG: ADD_INT 186; EG: LSHL 187; EG: ASHR [[RES]] 188; EG: LSHR {{\*?}} [[ADDR]] 189define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 190 %c = add i32 %a, %b 191 %x = shl i32 %c, 6 192 %y = ashr i32 %x, 7 193 store i32 %y, i32 addrspace(1)* %out 194 ret void 195} 196 197; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount 198; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6 199; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7 200; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6 201; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7 202 203; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 204; EG-NOT: BFE 205; EG: ADD_INT 206; EG: LSHL 207; EG: ASHR [[RES]] 208; EG: LSHL 209; EG: ASHR [[RES]] 210; EG: LSHR {{\*?}} [[ADDR]] 211define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 212 %c = add <2 x i32> %a, %b 213 %x = shl <2 x i32> %c, <i32 6, i32 6> 214 %y = ashr <2 x i32> %x, <i32 7, i32 7> 215 store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2 216 ret void 217} 218 219 220; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32 221; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 222; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 223; SI: BUFFER_STORE_DWORDX2 224 225; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 226; EG: BFE_INT [[RES]] 227; EG: BFE_INT [[RES]] 228; EG: LSHR {{\*?}} [[ADDR]] 229define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 230 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 231 %shl = shl <2 x i32> %c, <i32 31, i32 31> 232 %ashr = ashr <2 x i32> %shl, <i32 31, i32 31> 233 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 234 ret void 235} 236 237; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32 238; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 239; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 240; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 241; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 242; SI: BUFFER_STORE_DWORDX4 243 244; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 245; EG: BFE_INT [[RES]] 246; EG: BFE_INT [[RES]] 247; EG: BFE_INT [[RES]] 248; EG: BFE_INT [[RES]] 249; EG: LSHR {{\*?}} [[ADDR]] 250define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 251 %c = add <4 x i32> %a, %b ; add to prevent folding into extload 252 %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> 253 %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31> 254 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 255 ret void 256} 257 258; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32 259; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} 260; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} 261; SI: BUFFER_STORE_DWORDX2 262 263; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 264; EG: BFE_INT [[RES]] 265; EG: BFE_INT [[RES]] 266; EG: LSHR {{\*?}} [[ADDR]] 267define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 268 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 269 %shl = shl <2 x i32> %c, <i32 24, i32 24> 270 %ashr = ashr <2 x i32> %shl, <i32 24, i32 24> 271 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 272 ret void 273} 274 275; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32 276; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} 277; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} 278; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} 279; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} 280; SI: BUFFER_STORE_DWORDX4 281 282; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 283; EG: BFE_INT [[RES]] 284; EG: BFE_INT [[RES]] 285; EG: BFE_INT [[RES]] 286; EG: BFE_INT [[RES]] 287; EG: LSHR {{\*?}} [[ADDR]] 288define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 289 %c = add <4 x i32> %a, %b ; add to prevent folding into extload 290 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> 291 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 292 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 293 ret void 294} 295 296; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32 297; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} 298; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} 299; SI: BUFFER_STORE_DWORDX2 300 301; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 302; EG: BFE_INT [[RES]] 303; EG: BFE_INT [[RES]] 304; EG: LSHR {{\*?}} [[ADDR]] 305define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 306 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 307 %shl = shl <2 x i32> %c, <i32 16, i32 16> 308 %ashr = ashr <2 x i32> %shl, <i32 16, i32 16> 309 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 310 ret void 311} 312 313; FUNC-LABEL: @testcase 314define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind { 315 %and_a_1 = and i8 %a, 1 316 %cmp_eq = icmp eq i8 %and_a_1, 0 317 %cmp_slt = icmp slt i8 %a, 0 318 %sel0 = select i1 %cmp_slt, i8 0, i8 %a 319 %sel1 = select i1 %cmp_eq, i8 0, i8 %a 320 %xor = xor i8 %sel0, %sel1 321 store i8 %xor, i8 addrspace(1)* %out 322 ret void 323} 324 325; FUNC-LABEL: @testcase_3 326define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { 327 %and_a_1 = and i8 %a, 1 328 %cmp_eq = icmp eq i8 %and_a_1, 0 329 %cmp_slt = icmp slt i8 %a, 0 330 %sel0 = select i1 %cmp_slt, i8 0, i8 %a 331 %sel1 = select i1 %cmp_eq, i8 0, i8 %a 332 %xor = xor i8 %sel0, %sel1 333 store i8 %xor, i8 addrspace(1)* %out 334 ret void 335} 336 337; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32 338; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 339; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 340; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 341; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 342define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { 343 %loada = load <4 x i32> addrspace(1)* %a, align 16 344 %loadb = load <4 x i32> addrspace(1)* %b, align 16 345 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload 346 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> 347 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 348 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 349 ret void 350} 351 352; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32 353; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 354; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 355define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { 356 %loada = load <4 x i32> addrspace(1)* %a, align 16 357 %loadb = load <4 x i32> addrspace(1)* %b, align 16 358 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload 359 %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> 360 %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> 361 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 362 ret void 363} 364 365; FIXME: The BFE should really be eliminated. I think it should happen 366; when computeKnownBitsForTargetNode is implemented for imax. 367 368; FUNC-LABEL: @sext_in_reg_to_illegal_type 369; SI: BUFFER_LOAD_SBYTE 370; SI: V_MAX_I32 371; SI: V_BFE_I32 372; SI: BUFFER_STORE_SHORT 373define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { 374 %tmp5 = load i8 addrspace(1)* %src, align 1 375 %tmp2 = sext i8 %tmp5 to i32 376 %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone 377 %tmp4 = trunc i32 %tmp3 to i8 378 %tmp6 = sext i8 %tmp4 to i16 379 store i16 %tmp6, i16 addrspace(1)* %out, align 2 380 ret void 381} 382 383declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone 384 385; FUNC-LABEL: @bfe_0_width 386; SI-NOT: BFE 387; SI: S_ENDPGM 388define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 389 %load = load i32 addrspace(1)* %ptr, align 4 390 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone 391 store i32 %bfe, i32 addrspace(1)* %out, align 4 392 ret void 393} 394 395; FUNC-LABEL: @bfe_8_bfe_8 396; SI: V_BFE_I32 397; SI-NOT: BFE 398; SI: S_ENDPGM 399define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 400 %load = load i32 addrspace(1)* %ptr, align 4 401 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone 402 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone 403 store i32 %bfe1, i32 addrspace(1)* %out, align 4 404 ret void 405} 406 407; FUNC-LABEL: @bfe_8_bfe_16 408; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 409; SI: S_ENDPGM 410define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 411 %load = load i32 addrspace(1)* %ptr, align 4 412 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone 413 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone 414 store i32 %bfe1, i32 addrspace(1)* %out, align 4 415 ret void 416} 417 418; This really should be folded into 1 419; FUNC-LABEL: @bfe_16_bfe_8 420; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 421; SI-NOT: BFE 422; SI: S_ENDPGM 423define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 424 %load = load i32 addrspace(1)* %ptr, align 4 425 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone 426 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone 427 store i32 %bfe1, i32 addrspace(1)* %out, align 4 428 ret void 429} 430 431; Make sure there isn't a redundant BFE 432; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe 433; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}} 434; SI-NOT: BFE 435; SI: S_ENDPGM 436define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 437 %c = add i32 %a, %b ; add to prevent folding into extload 438 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone 439 %shl = shl i32 %bfe, 24 440 %ashr = ashr i32 %shl, 24 441 store i32 %ashr, i32 addrspace(1)* %out, align 4 442 ret void 443} 444 445; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong 446define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 447 %c = add i32 %a, %b ; add to prevent folding into extload 448 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone 449 %shl = shl i32 %bfe, 24 450 %ashr = ashr i32 %shl, 24 451 store i32 %ashr, i32 addrspace(1)* %out, align 4 452 ret void 453} 454 455; FUNC-LABEL: @sextload_i8_to_i32_bfe 456; SI: BUFFER_LOAD_SBYTE 457; SI-NOT: BFE 458; SI: S_ENDPGM 459define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { 460 %load = load i8 addrspace(1)* %ptr, align 1 461 %sext = sext i8 %load to i32 462 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone 463 %shl = shl i32 %bfe, 24 464 %ashr = ashr i32 %shl, 24 465 store i32 %ashr, i32 addrspace(1)* %out, align 4 466 ret void 467} 468 469; FUNC-LABEL: @sextload_i8_to_i32_bfe_0: 470; SI-NOT: BFE 471; SI: S_ENDPGM 472define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { 473 %load = load i8 addrspace(1)* %ptr, align 1 474 %sext = sext i8 %load to i32 475 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone 476 %shl = shl i32 %bfe, 24 477 %ashr = ashr i32 %shl, 24 478 store i32 %ashr, i32 addrspace(1)* %out, align 4 479 ret void 480} 481 482; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0: 483; SI-NOT: SHR 484; SI-NOT: SHL 485; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 486; SI: S_ENDPGM 487define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 488 %x = load i32 addrspace(1)* %in, align 4 489 %shl = shl i32 %x, 31 490 %shr = ashr i32 %shl, 31 491 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) 492 store i32 %bfe, i32 addrspace(1)* %out, align 4 493 ret void 494} 495 496; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1 497; SI: BUFFER_LOAD_DWORD 498; SI-NOT: SHL 499; SI-NOT: SHR 500; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 501; SI: S_ENDPGM 502define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 503 %x = load i32 addrspace(1)* %in, align 4 504 %shl = shl i32 %x, 30 505 %shr = ashr i32 %shl, 30 506 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) 507 store i32 %bfe, i32 addrspace(1)* %out, align 4 508 ret void 509} 510 511; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1: 512; SI: BUFFER_LOAD_DWORD 513; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} 514; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} 515; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 516; SI: S_ENDPGM 517define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 518 %x = load i32 addrspace(1)* %in, align 4 519 %shl = shl i32 %x, 30 520 %shr = ashr i32 %shl, 30 521 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) 522 store i32 %bfe, i32 addrspace(1)* %out, align 4 523 ret void 524} 525