1; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 3declare i32 @llvm.r600.read.tidig.x() nounwind readnone 4 5; FUNC-LABEL: {{^}}v_test_imin_sle_i32: 6; SI: v_min_i32_e32 7define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 8 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 9 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 10 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 11 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 12 %a = load i32, i32 addrspace(1)* %gep0, align 4 13 %b = load i32, i32 addrspace(1)* %gep1, align 4 14 %cmp = icmp sle i32 %a, %b 15 %val = select i1 %cmp, i32 %a, i32 %b 16 store i32 %val, i32 addrspace(1)* %outgep, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}s_test_imin_sle_i32: 21; SI: s_min_i32 22define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 23 %cmp = icmp sle i32 %a, %b 24 %val = select i1 %cmp, i32 %a, i32 %b 25 store i32 %val, i32 addrspace(1)* %out, align 4 26 ret void 27} 28 29; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32: 30; SI: s_min_i32 31define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { 32 %cmp = icmp sle <1 x i32> %a, %b 33 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b 34 store <1 x i32> %val, <1 x i32> addrspace(1)* %out 35 ret void 36} 37 38; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32: 39; SI: s_min_i32 40; SI: s_min_i32 41; SI: s_min_i32 42; SI: s_min_i32 43define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 44 %cmp = icmp sle <4 x i32> %a, %b 45 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b 46 store <4 x i32> %val, <4 x i32> addrspace(1)* %out 47 ret void 48} 49 50; FUNC-LABEL: {{^}}s_test_imin_sle_i8: 51; SI: s_load_dword 52; SI: s_load_dword 53; SI: s_sext_i32_i8 54; SI: s_sext_i32_i8 55; SI: s_min_i32 56define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { 57 %cmp = icmp sle i8 %a, %b 58 %val = select i1 %cmp, i8 %a, i8 %b 59 store i8 %val, i8 addrspace(1)* %out 60 ret void 61} 62 63; XXX - should be able to use s_min if we stop unnecessarily doing 64; extloads with mubuf instructions. 65 66; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8: 67; SI: buffer_load_sbyte 68; SI: buffer_load_sbyte 69; SI: buffer_load_sbyte 70; SI: buffer_load_sbyte 71; SI: buffer_load_sbyte 72; SI: buffer_load_sbyte 73; SI: buffer_load_sbyte 74; SI: buffer_load_sbyte 75 76; SI: v_min_i32 77; SI: v_min_i32 78; SI: v_min_i32 79; SI: v_min_i32 80 81; SI: s_endpgm 82define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind { 83 %cmp = icmp sle <4 x i8> %a, %b 84 %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b 85 store <4 x i8> %val, <4 x i8> addrspace(1)* %out 86 ret void 87} 88 89; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16: 90; SI: v_min_i32 91; SI: v_min_i32 92; SI: v_min_i32 93; SI: v_min_i32 94define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind { 95 %cmp = icmp sle <4 x i16> %a, %b 96 %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b 97 store <4 x i16> %val, <4 x i16> addrspace(1)* %out 98 ret void 99} 100 101; FUNC-LABEL: @v_test_imin_slt_i32 102; SI: v_min_i32_e32 103define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 104 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 105 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 106 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 107 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 108 %a = load i32, i32 addrspace(1)* %gep0, align 4 109 %b = load i32, i32 addrspace(1)* %gep1, align 4 110 %cmp = icmp slt i32 %a, %b 111 %val = select i1 %cmp, i32 %a, i32 %b 112 store i32 %val, i32 addrspace(1)* %outgep, align 4 113 ret void 114} 115 116; FUNC-LABEL: @s_test_imin_slt_i32 117; SI: s_min_i32 118define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 119 %cmp = icmp slt i32 %a, %b 120 %val = select i1 %cmp, i32 %a, i32 %b 121 store i32 %val, i32 addrspace(1)* %out, align 4 122 ret void 123} 124 125; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32: 126; SI: s_min_i32 127; SI: s_min_i32 128define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 129 %cmp = icmp slt <2 x i32> %a, %b 130 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b 131 store <2 x i32> %val, <2 x i32> addrspace(1)* %out 132 ret void 133} 134 135; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32: 136; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 137define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 138 %cmp = icmp slt i32 %a, 8 139 %val = select i1 %cmp, i32 %a, i32 8 140 store i32 %val, i32 addrspace(1)* %out, align 4 141 ret void 142} 143 144; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32: 145; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 146define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 147 %cmp = icmp sle i32 %a, 8 148 %val = select i1 %cmp, i32 %a, i32 8 149 store i32 %val, i32 addrspace(1)* %out, align 4 150 ret void 151} 152 153; FUNC-LABEL: @v_test_umin_ule_i32 154; SI: v_min_u32_e32 155define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 156 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 157 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 158 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 159 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 160 %a = load i32, i32 addrspace(1)* %gep0, align 4 161 %b = load i32, i32 addrspace(1)* %gep1, align 4 162 %cmp = icmp ule i32 %a, %b 163 %val = select i1 %cmp, i32 %a, i32 %b 164 store i32 %val, i32 addrspace(1)* %outgep, align 4 165 ret void 166} 167 168; FUNC-LABEL: @v_test_umin_ule_v3i32 169; SI: v_min_u32_e32 170; SI: v_min_u32_e32 171; SI: v_min_u32_e32 172; SI-NOT: v_min_u32_e32 173; SI: s_endpgm 174define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind { 175 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 176 %gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid 177 %gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid 178 %outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid 179 %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0 180 %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1 181 %cmp = icmp ule <3 x i32> %a, %b 182 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b 183 store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep 184 ret void 185} 186; FUNC-LABEL: @s_test_umin_ule_i32 187; SI: s_min_u32 188define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 189 %cmp = icmp ule i32 %a, %b 190 %val = select i1 %cmp, i32 %a, i32 %b 191 store i32 %val, i32 addrspace(1)* %out, align 4 192 ret void 193} 194 195; FUNC-LABEL: @v_test_umin_ult_i32 196; SI: v_min_u32_e32 197define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 198 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 199 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 200 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 201 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 202 %a = load i32, i32 addrspace(1)* %gep0, align 4 203 %b = load i32, i32 addrspace(1)* %gep1, align 4 204 %cmp = icmp ult i32 %a, %b 205 %val = select i1 %cmp, i32 %a, i32 %b 206 store i32 %val, i32 addrspace(1)* %outgep, align 4 207 ret void 208} 209 210; FUNC-LABEL: {{^}}v_test_umin_ult_i8: 211; SI: buffer_load_ubyte 212; SI: buffer_load_ubyte 213; SI: v_min_u32_e32 214define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 215 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 216 %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid 217 %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid 218 %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 219 %a = load i8, i8 addrspace(1)* %gep0, align 1 220 %b = load i8, i8 addrspace(1)* %gep1, align 1 221 %cmp = icmp ult i8 %a, %b 222 %val = select i1 %cmp, i8 %a, i8 %b 223 store i8 %val, i8 addrspace(1)* %outgep, align 1 224 ret void 225} 226 227; FUNC-LABEL: @s_test_umin_ult_i32 228; SI: s_min_u32 229define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 230 %cmp = icmp ult i32 %a, %b 231 %val = select i1 %cmp, i32 %a, i32 %b 232 store i32 %val, i32 addrspace(1)* %out, align 4 233 ret void 234} 235 236; FUNC-LABEL: @v_test_umin_ult_i32_multi_use 237; SI-NOT: v_min 238; SI: v_cmp_lt_u32 239; SI-NEXT: v_cndmask_b32 240; SI-NOT: v_min 241; SI: s_endpgm 242define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 243 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 244 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 245 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 246 %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid 247 %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid 248 %a = load i32, i32 addrspace(1)* %gep0, align 4 249 %b = load i32, i32 addrspace(1)* %gep1, align 4 250 %cmp = icmp ult i32 %a, %b 251 %val = select i1 %cmp, i32 %a, i32 %b 252 store i32 %val, i32 addrspace(1)* %outgep0, align 4 253 store i1 %cmp, i1 addrspace(1)* %outgep1 254 ret void 255} 256 257 258; FUNC-LABEL: @s_test_umin_ult_v1i32 259; SI: s_min_u32 260define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { 261 %cmp = icmp ult <1 x i32> %a, %b 262 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b 263 store <1 x i32> %val, <1 x i32> addrspace(1)* %out 264 ret void 265} 266 267; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32: 268; SI: s_min_u32 269; SI: s_min_u32 270; SI: s_min_u32 271; SI: s_min_u32 272; SI: s_min_u32 273; SI: s_min_u32 274; SI: s_min_u32 275; SI: s_min_u32 276define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind { 277 %cmp = icmp ult <8 x i32> %a, %b 278 %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b 279 store <8 x i32> %val, <8 x i32> addrspace(1)* %out 280 ret void 281} 282 283; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16: 284; SI: v_min_u32 285; SI: v_min_u32 286; SI: v_min_u32 287; SI: v_min_u32 288; SI: v_min_u32 289; SI: v_min_u32 290; SI: v_min_u32 291; SI: v_min_u32 292define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind { 293 %cmp = icmp ult <8 x i16> %a, %b 294 %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b 295 store <8 x i16> %val, <8 x i16> addrspace(1)* %out 296 ret void 297} 298 299; Make sure redundant and removed 300; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16: 301; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 302; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc 303; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]] 304; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] 305; SI-NEXT: buffer_store_dword [[VMIN]] 306define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind { 307 %a.ext = zext i16 %a to i32 308 %b.ext = zext i16 %b to i32 309 %cmp = icmp ult i32 %a.ext, %b.ext 310 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 311 %mask = and i32 %val, 65535 312 store i32 %mask, i32 addrspace(1)* %out 313 ret void 314} 315 316; Make sure redundant sign_extend_inreg removed. 317 318; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16: 319; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 320; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc 321; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]] 322; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] 323; SI-NEXT: buffer_store_dword [[VMIN]] 324define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind { 325 %a.ext = sext i16 %a to i32 326 %b.ext = sext i16 %b to i32 327 %cmp = icmp slt i32 %a.ext, %b.ext 328 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 329 %shl = shl i32 %val, 16 330 %sextinreg = ashr i32 %shl, 16 331 store i32 %sextinreg, i32 addrspace(1)* %out 332 ret void 333} 334 335; FUNC-LABEL: {{^}}s_test_imin_sle_i16: 336; SI: s_min_i32 337define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { 338 %cmp = icmp sle i16 %a, %b 339 %val = select i1 %cmp, i16 %a, i16 %b 340 store i16 %val, i16 addrspace(1)* %out 341 ret void 342} 343