1; RUN: llc -march=amdgcn -mcpu=pitcairn < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s 3 4 5; FUNC-LABEL: {{^}}v_test_imax_sge_i32: 6; SI: v_max_i32_e32 7 8; EG: MAX_INT 9define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 10 %tid = call i32 @llvm.amdgcn.workitem.id.x() 11 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 12 %a = load i32, i32 addrspace(1)* %aptr, align 4 13 %b = load i32, i32 addrspace(1)* %gep.in, align 4 14 %cmp = icmp sge i32 %a, %b 15 %val = select i1 %cmp, i32 %a, i32 %b 16 store i32 %val, i32 addrspace(1)* %out, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32: 21; SI: v_max_i32_e32 22; SI: v_max_i32_e32 23; SI: v_max_i32_e32 24; SI: v_max_i32_e32 25 26; These could be merged into one 27; EG: MAX_INT 28; EG: MAX_INT 29; EG: MAX_INT 30; EG: MAX_INT 31define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { 32 %tid = call i32 @llvm.amdgcn.workitem.id.x() 33 %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid 34 %a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4 35 %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 36 %cmp = icmp sge <4 x i32> %a, %b 37 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b 38 store <4 x i32> %val, <4 x i32> addrspace(1)* %out, align 4 39 ret void 40} 41 42; FUNC-LABEL: @s_test_imax_sge_i32 43; SI: s_max_i32 44 45; EG: MAX_INT 46define amdgpu_kernel void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 47 %cmp = icmp sge i32 %a, %b 48 %val = select i1 %cmp, i32 %a, i32 %b 49 store i32 %val, i32 addrspace(1)* %out, align 4 50 ret void 51} 52 53; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32: 54; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 55 56; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 57define amdgpu_kernel void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 58 %cmp = icmp sge i32 %a, 9 59 %val = select i1 %cmp, i32 %a, i32 9 60 store i32 %val, i32 addrspace(1)* %out, align 4 61 ret void 62} 63 64; FUNC-LABEL: {{^}}v_test_imax_sge_i8: 65; SI: buffer_load_sbyte 66; SI: buffer_load_sbyte 67; SI: v_max_i32_e32 68 69; EG: MAX_INT 70define amdgpu_kernel void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 71 %a = load i8, i8 addrspace(1)* %aptr, align 1 72 %b = load i8, i8 addrspace(1)* %bptr, align 1 73 %cmp = icmp sge i8 %a, %b 74 %val = select i1 %cmp, i8 %a, i8 %b 75 store i8 %val, i8 addrspace(1)* %out, align 1 76 ret void 77} 78 79; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: 80; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 81 82; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 83define amdgpu_kernel void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 84 %cmp = icmp sgt i32 %a, 9 85 %val = select i1 %cmp, i32 %a, i32 9 86 store i32 %val, i32 addrspace(1)* %out, align 4 87 ret void 88} 89 90; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32: 91; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 92; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 93 94; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 95; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 96define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 97 %cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9> 98 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9> 99 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 100 ret void 101} 102 103; FUNC-LABEL: @v_test_imax_sgt_i32 104; SI: v_max_i32_e32 105 106; EG: MAX_INT 107define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 108 %tid = call i32 @llvm.amdgcn.workitem.id.x() 109 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 110 %a = load i32, i32 addrspace(1)* %aptr, align 4 111 %b = load i32, i32 addrspace(1)* %gep.in, align 4 112 %cmp = icmp sgt i32 %a, %b 113 %val = select i1 %cmp, i32 %a, i32 %b 114 store i32 %val, i32 addrspace(1)* %out, align 4 115 ret void 116} 117 118; FUNC-LABEL: @s_test_imax_sgt_i32 119; SI: s_max_i32 120 121; EG: MAX_INT 122define amdgpu_kernel void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 123 %cmp = icmp sgt i32 %a, %b 124 %val = select i1 %cmp, i32 %a, i32 %b 125 store i32 %val, i32 addrspace(1)* %out, align 4 126 ret void 127} 128 129; FUNC-LABEL: @v_test_umax_uge_i32 130; SI: v_max_u32_e32 131 132; EG: MAX_UINT 133define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 134 %tid = call i32 @llvm.amdgcn.workitem.id.x() 135 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 136 %a = load i32, i32 addrspace(1)* %aptr, align 4 137 %b = load i32, i32 addrspace(1)* %gep.in, align 4 138 %cmp = icmp uge i32 %a, %b 139 %val = select i1 %cmp, i32 %a, i32 %b 140 store i32 %val, i32 addrspace(1)* %out, align 4 141 ret void 142} 143 144; FUNC-LABEL: @s_test_umax_uge_i32 145; SI: s_max_u32 146 147; EG: MAX_UINT 148define amdgpu_kernel void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 149 %cmp = icmp uge i32 %a, %b 150 %val = select i1 %cmp, i32 %a, i32 %b 151 store i32 %val, i32 addrspace(1)* %out, align 4 152 ret void 153} 154 155; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32: 156; SI: s_max_u32 157; SI: s_max_u32 158; SI: s_max_u32 159; SI-NOT: s_max_u32 160; SI: s_endpgm 161 162; EG: MAX_UINT 163; EG: MAX_UINT 164; EG: MAX_UINT 165; EG-NOT: MAX_UINT 166define amdgpu_kernel void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind { 167 %cmp = icmp uge <3 x i32> %a, %b 168 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b 169 store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4 170 ret void 171} 172 173; FUNC-LABEL: {{^}}v_test_umax_uge_i8: 174; SI: buffer_load_ubyte 175; SI: buffer_load_ubyte 176; SI: v_max_u32_e32 177 178; EG: MAX_UINT 179define amdgpu_kernel void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 180 %a = load i8, i8 addrspace(1)* %aptr, align 1 181 %b = load i8, i8 addrspace(1)* %bptr, align 1 182 %cmp = icmp uge i8 %a, %b 183 %val = select i1 %cmp, i8 %a, i8 %b 184 store i8 %val, i8 addrspace(1)* %out, align 1 185 ret void 186} 187 188; FUNC-LABEL: @v_test_umax_ugt_i32 189; SI: v_max_u32_e32 190 191; EG: MAX_UINT 192define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 193 %tid = call i32 @llvm.amdgcn.workitem.id.x() 194 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 195 %a = load i32, i32 addrspace(1)* %gep.in, align 4 196 %b = load i32, i32 addrspace(1)* %bptr, align 4 197 %cmp = icmp ugt i32 %a, %b 198 %val = select i1 %cmp, i32 %a, i32 %b 199 store i32 %val, i32 addrspace(1)* %out, align 4 200 ret void 201} 202 203; FUNC-LABEL: {{^}}s_test_umax_ugt_i32: 204; SI: s_max_u32 205 206; EG: MAX_UINT 207define amdgpu_kernel void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 208 %cmp = icmp ugt i32 %a, %b 209 %val = select i1 %cmp, i32 %a, i32 %b 210 store i32 %val, i32 addrspace(1)* %out, align 4 211 ret void 212} 213 214; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32: 215; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 216; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 217 218; EG: MAX_UINT {{.*}}literal.{{[xyzw]}} 219; EG: MAX_UINT {{.*}}literal.{{[xyzw]}} 220define amdgpu_kernel void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 221 %cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23> 222 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23> 223 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 224 ret void 225} 226 227; Make sure redundant and removed 228; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: 229; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 230; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 231; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]] 232; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 233; SI: buffer_store_dword [[VMAX]] 234 235; EG: MAX_UINT 236define amdgpu_kernel void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) nounwind { 237 %a.ext = zext i16 %a to i32 238 %b.ext = zext i16 %b to i32 239 %cmp = icmp ugt i32 %a.ext, %b.ext 240 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 241 %mask = and i32 %val, 65535 242 store i32 %mask, i32 addrspace(1)* %out 243 ret void 244} 245 246; Make sure redundant sign_extend_inreg removed. 247 248; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16: 249; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 250; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 251; SI-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]] 252; SI-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]] 253 254; SI: s_max_i32 [[MAX:s[0-9]+]], [[EXT_A]], [[EXT_B]] 255; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 256; SI: buffer_store_dword [[VMAX]] 257 258; EG: MAX_INT 259define amdgpu_kernel void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) nounwind { 260 %a.ext = sext i16 %a to i32 261 %b.ext = sext i16 %b to i32 262 %cmp = icmp sgt i32 %a.ext, %b.ext 263 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 264 %shl = shl i32 %val, 16 265 %sextinreg = ashr i32 %shl, 16 266 store i32 %sextinreg, i32 addrspace(1)* %out 267 ret void 268} 269 270; FUNC-LABEL: {{^}}s_test_imax_sge_i16: 271; SI: s_load_dword 272; SI: s_load_dword 273; SI: s_sext_i32_i16 274; SI: s_sext_i32_i16 275; SI: s_max_i32 276 277; EG: MAX_INT 278define amdgpu_kernel void @s_test_imax_sge_i16(i16 addrspace(1)* %out, [8 x i32], i16 %a, [8 x i32], i16 %b) nounwind { 279 %cmp = icmp sge i16 %a, %b 280 %val = select i1 %cmp, i16 %a, i16 %b 281 store i16 %val, i16 addrspace(1)* %out 282 ret void 283} 284 285; 64 bit 286; FUNC-LABEL: {{^}}test_umax_ugt_i64 287; SI: s_endpgm 288 289; EG: MAX_UINT 290; EG: MAX_UINT 291define amdgpu_kernel void @test_umax_ugt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 292 %tmp = icmp ugt i64 %a, %b 293 %val = select i1 %tmp, i64 %a, i64 %b 294 store i64 %val, i64 addrspace(1)* %out, align 8 295 ret void 296} 297 298; FUNC-LABEL: {{^}}test_umax_uge_i64 299; SI: s_endpgm 300 301; EG: MAX_UINT 302; EG: MAX_UINT 303define amdgpu_kernel void @test_umax_uge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 304 %tmp = icmp uge i64 %a, %b 305 %val = select i1 %tmp, i64 %a, i64 %b 306 store i64 %val, i64 addrspace(1)* %out, align 8 307 ret void 308} 309 310; FUNC-LABEL: {{^}}test_imax_sgt_i64 311; SI: s_endpgm 312 313; EG-DAG: MAX_UINT 314; EG-DAG: MAX_INT 315define amdgpu_kernel void @test_imax_sgt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 316 %tmp = icmp sgt i64 %a, %b 317 %val = select i1 %tmp, i64 %a, i64 %b 318 store i64 %val, i64 addrspace(1)* %out, align 8 319 ret void 320} 321 322; FUNC-LABEL: {{^}}test_imax_sge_i64 323; SI: s_endpgm 324 325; EG-DAG: MAX_UINT 326; EG-DAG: MAX_INT 327define amdgpu_kernel void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 328 %tmp = icmp sge i64 %a, %b 329 %val = select i1 %tmp, i64 %a, i64 %b 330 store i64 %val, i64 addrspace(1)* %out, align 8 331 ret void 332} 333 334 335declare i32 @llvm.amdgcn.workitem.id.x() #0 336 337attributes #0 = { nounwind readnone } 338attributes #1 = { nounwind } 339