1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s 3 4declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 5 6; FUNC-LABEL: {{^}}setcc_v2i32: 7; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z 8; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y 9 10; GCN: v_cmp_eq_u32_e32 11; GCN: v_cmp_eq_u32_e32 12define amdgpu_kernel void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { 13 %result = icmp eq <2 x i32> %a, %b 14 %sext = sext <2 x i1> %result to <2 x i32> 15 store <2 x i32> %sext, <2 x i32> addrspace(1)* %out 16 ret void 17} 18 19; FUNC-LABEL: {{^}}setcc_v4i32: 20; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 21; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 22; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 23; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 24 25; GCN: v_cmp_eq_u32_e32 26; GCN: v_cmp_eq_u32_e32 27; GCN: v_cmp_eq_u32_e32 28; GCN: v_cmp_eq_u32_e32 29define amdgpu_kernel void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { 30 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 31 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 32 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 33 %result = icmp eq <4 x i32> %a, %b 34 %sext = sext <4 x i1> %result to <4 x i32> 35 store <4 x i32> %sext, <4 x i32> addrspace(1)* %out 36 ret void 37} 38 39;;;==========================================================================;;; 40;; Float comparisons 41;;;==========================================================================;;; 42 43; FUNC-LABEL: {{^}}f32_oeq: 44; R600: SETE_DX10 45; GCN: v_cmp_eq_f32 46define amdgpu_kernel void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) #0 { 47entry: 48 %0 = fcmp oeq float %a, %b 49 %1 = sext i1 %0 to i32 50 store i32 %1, i32 addrspace(1)* %out 51 ret void 52} 53 54; FUNC-LABEL: {{^}}f32_ogt: 55; R600: SETGT_DX10 56; GCN: v_cmp_gt_f32 57define amdgpu_kernel void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) #0 { 58entry: 59 %0 = fcmp ogt float %a, %b 60 %1 = sext i1 %0 to i32 61 store i32 %1, i32 addrspace(1)* %out 62 ret void 63} 64 65; FUNC-LABEL: {{^}}f32_oge: 66; R600: SETGE_DX10 67; GCN: v_cmp_ge_f32 68define amdgpu_kernel void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) #0 { 69entry: 70 %0 = fcmp oge float %a, %b 71 %1 = sext i1 %0 to i32 72 store i32 %1, i32 addrspace(1)* %out 73 ret void 74} 75 76; FUNC-LABEL: {{^}}f32_olt: 77; R600: SETGT_DX10 78; GCN: v_cmp_lt_f32 79define amdgpu_kernel void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) #0 { 80entry: 81 %0 = fcmp olt float %a, %b 82 %1 = sext i1 %0 to i32 83 store i32 %1, i32 addrspace(1)* %out 84 ret void 85} 86 87; FUNC-LABEL: {{^}}f32_ole: 88; R600: SETGE_DX10 89; GCN: v_cmp_le_f32 90define amdgpu_kernel void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) #0 { 91entry: 92 %0 = fcmp ole float %a, %b 93 %1 = sext i1 %0 to i32 94 store i32 %1, i32 addrspace(1)* %out 95 ret void 96} 97 98; FUNC-LABEL: {{^}}f32_one: 99; R600-DAG: SETE_DX10 100; R600-DAG: SETE_DX10 101; R600-DAG: AND_INT 102; R600-DAG: SETNE_DX10 103; R600-DAG: AND_INT 104; R600-DAG: SETNE_INT 105 106; GCN: v_cmp_lg_f32_e32 vcc 107; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 108define amdgpu_kernel void @f32_one(i32 addrspace(1)* %out, float %a, float %b) #0 { 109entry: 110 %0 = fcmp one float %a, %b 111 %1 = sext i1 %0 to i32 112 store i32 %1, i32 addrspace(1)* %out 113 ret void 114} 115 116; FUNC-LABEL: {{^}}f32_ord: 117; R600-DAG: SETE_DX10 118; R600-DAG: SETE_DX10 119; R600-DAG: AND_INT 120; R600-DAG: SETNE_INT 121; GCN: v_cmp_o_f32 122define amdgpu_kernel void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) #0 { 123entry: 124 %0 = fcmp ord float %a, %b 125 %1 = sext i1 %0 to i32 126 store i32 %1, i32 addrspace(1)* %out 127 ret void 128} 129 130; FUNC-LABEL: {{^}}f32_ueq: 131; R600-DAG: SETNE_DX10 132; R600-DAG: SETNE_DX10 133; R600-DAG: OR_INT 134; R600-DAG: SETE_DX10 135; R600-DAG: OR_INT 136; R600-DAG: SETNE_INT 137 138; GCN: v_cmp_nlg_f32_e32 vcc 139; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 140define amdgpu_kernel void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) #0 { 141entry: 142 %0 = fcmp ueq float %a, %b 143 %1 = sext i1 %0 to i32 144 store i32 %1, i32 addrspace(1)* %out 145 ret void 146} 147 148; FUNC-LABEL: {{^}}f32_ugt: 149; R600: SETGE 150; R600: SETE_DX10 151; GCN: v_cmp_nle_f32_e32 vcc 152; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 153define amdgpu_kernel void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) #0 { 154entry: 155 %0 = fcmp ugt float %a, %b 156 %1 = sext i1 %0 to i32 157 store i32 %1, i32 addrspace(1)* %out 158 ret void 159} 160 161; FUNC-LABEL: {{^}}f32_uge: 162; R600: SETGT 163; R600: SETE_DX10 164 165; GCN: v_cmp_nlt_f32_e32 vcc 166; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 167define amdgpu_kernel void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) #0 { 168entry: 169 %0 = fcmp uge float %a, %b 170 %1 = sext i1 %0 to i32 171 store i32 %1, i32 addrspace(1)* %out 172 ret void 173} 174 175; FUNC-LABEL: {{^}}f32_ult: 176; R600: SETGE 177; R600: SETE_DX10 178 179; GCN: v_cmp_nge_f32_e32 vcc 180; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 181define amdgpu_kernel void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) #0 { 182entry: 183 %0 = fcmp ult float %a, %b 184 %1 = sext i1 %0 to i32 185 store i32 %1, i32 addrspace(1)* %out 186 ret void 187} 188 189; FUNC-LABEL: {{^}}f32_ule: 190; R600: SETGT 191; R600: SETE_DX10 192 193; GCN: v_cmp_ngt_f32_e32 vcc 194; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc 195define amdgpu_kernel void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) #0 { 196entry: 197 %0 = fcmp ule float %a, %b 198 %1 = sext i1 %0 to i32 199 store i32 %1, i32 addrspace(1)* %out 200 ret void 201} 202 203; FUNC-LABEL: {{^}}f32_une: 204; R600: SETNE_DX10 205; GCN: v_cmp_neq_f32 206define amdgpu_kernel void @f32_une(i32 addrspace(1)* %out, float %a, float %b) #0 { 207entry: 208 %0 = fcmp une float %a, %b 209 %1 = sext i1 %0 to i32 210 store i32 %1, i32 addrspace(1)* %out 211 ret void 212} 213 214; FUNC-LABEL: {{^}}f32_uno: 215; R600: SETNE_DX10 216; R600: SETNE_DX10 217; R600: OR_INT 218; R600: SETNE_INT 219; GCN: v_cmp_u_f32 220define amdgpu_kernel void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) #0 { 221entry: 222 %0 = fcmp uno float %a, %b 223 %1 = sext i1 %0 to i32 224 store i32 %1, i32 addrspace(1)* %out 225 ret void 226} 227 228;;;==========================================================================;;; 229;; 32-bit integer comparisons 230;;;==========================================================================;;; 231 232; FUNC-LABEL: {{^}}i32_eq: 233; R600: SETE_INT 234; GCN: v_cmp_eq_u32 235define amdgpu_kernel void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 236entry: 237 %0 = icmp eq i32 %a, %b 238 %1 = sext i1 %0 to i32 239 store i32 %1, i32 addrspace(1)* %out 240 ret void 241} 242 243; FUNC-LABEL: {{^}}i32_ne: 244; R600: SETNE_INT 245; GCN: v_cmp_ne_u32 246define amdgpu_kernel void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 247entry: 248 %0 = icmp ne i32 %a, %b 249 %1 = sext i1 %0 to i32 250 store i32 %1, i32 addrspace(1)* %out 251 ret void 252} 253 254; FUNC-LABEL: {{^}}i32_ugt: 255; R600: SETGT_UINT 256; GCN: v_cmp_gt_u32 257define amdgpu_kernel void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 258entry: 259 %0 = icmp ugt i32 %a, %b 260 %1 = sext i1 %0 to i32 261 store i32 %1, i32 addrspace(1)* %out 262 ret void 263} 264 265; FUNC-LABEL: {{^}}i32_uge: 266; R600: SETGE_UINT 267; GCN: v_cmp_ge_u32 268define amdgpu_kernel void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 269entry: 270 %0 = icmp uge i32 %a, %b 271 %1 = sext i1 %0 to i32 272 store i32 %1, i32 addrspace(1)* %out 273 ret void 274} 275 276; FUNC-LABEL: {{^}}i32_ult: 277; R600: SETGT_UINT 278; GCN: v_cmp_lt_u32 279define amdgpu_kernel void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 280entry: 281 %0 = icmp ult i32 %a, %b 282 %1 = sext i1 %0 to i32 283 store i32 %1, i32 addrspace(1)* %out 284 ret void 285} 286 287; FUNC-LABEL: {{^}}i32_ule: 288; R600: SETGE_UINT 289; GCN: v_cmp_le_u32 290define amdgpu_kernel void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 291entry: 292 %0 = icmp ule i32 %a, %b 293 %1 = sext i1 %0 to i32 294 store i32 %1, i32 addrspace(1)* %out 295 ret void 296} 297 298; FUNC-LABEL: {{^}}i32_sgt: 299; R600: SETGT_INT 300; GCN: v_cmp_gt_i32 301define amdgpu_kernel void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 302entry: 303 %0 = icmp sgt i32 %a, %b 304 %1 = sext i1 %0 to i32 305 store i32 %1, i32 addrspace(1)* %out 306 ret void 307} 308 309; FUNC-LABEL: {{^}}i32_sge: 310; R600: SETGE_INT 311; GCN: v_cmp_ge_i32 312define amdgpu_kernel void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 313entry: 314 %0 = icmp sge i32 %a, %b 315 %1 = sext i1 %0 to i32 316 store i32 %1, i32 addrspace(1)* %out 317 ret void 318} 319 320; FUNC-LABEL: {{^}}i32_slt: 321; R600: SETGT_INT 322; GCN: v_cmp_lt_i32 323define amdgpu_kernel void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 324entry: 325 %0 = icmp slt i32 %a, %b 326 %1 = sext i1 %0 to i32 327 store i32 %1, i32 addrspace(1)* %out 328 ret void 329} 330 331; FUNC-LABEL: {{^}}i32_sle: 332; R600: SETGE_INT 333; GCN: v_cmp_le_i32 334define amdgpu_kernel void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 335entry: 336 %0 = icmp sle i32 %a, %b 337 %1 = sext i1 %0 to i32 338 store i32 %1, i32 addrspace(1)* %out 339 ret void 340} 341 342; FIXME: This does 4 compares 343; FUNC-LABEL: {{^}}v3i32_eq: 344; GCN-DAG: v_cmp_eq_u32 345; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 346; GCN-DAG: v_cmp_eq_u32 347; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 348; GCN-DAG: v_cmp_eq_u32 349; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 350; GCN: s_endpgm 351define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 { 352 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 353 %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid 354 %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid 355 %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid 356 %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a 357 %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b 358 %cmp = icmp eq <3 x i32> %a, %b 359 %ext = sext <3 x i1> %cmp to <3 x i32> 360 store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out 361 ret void 362} 363 364; FUNC-LABEL: {{^}}v3i8_eq: 365; GCN-DAG: v_cmp_eq_u32 366; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 367; GCN-DAG: v_cmp_eq_u32 368; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 369; GCN-DAG: v_cmp_eq_u32 370; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, 371; GCN: s_endpgm 372define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 { 373 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 374 %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid 375 %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid 376 %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid 377 %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a 378 %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b 379 %cmp = icmp eq <3 x i8> %a, %b 380 %ext = sext <3 x i1> %cmp to <3 x i8> 381 store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out 382 ret void 383} 384 385; Make sure we don't try to emit i1 setcc ops 386; FUNC-LABEL: setcc-i1 387; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1 388; GCN: s_cmp_eq_u32 [[AND]], 0 389define amdgpu_kernel void @setcc-i1(i32 %in) #0 { 390 %and = and i32 %in, 1 391 %cmp = icmp eq i32 %and, 0 392 br i1 %cmp, label %endif, label %if 393if: 394 unreachable 395endif: 396 ret void 397} 398 399; FUNC-LABEL: setcc-i1-and-xor 400; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} 401; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 402; GCN: s_or_b64 s[2:3], [[A]], [[B]] 403define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 { 404bb0: 405 %tmp5 = fcmp oge float %cond, 0.000000e+00 406 %tmp7 = fcmp ole float %cond, 1.000000e+00 407 %tmp9 = and i1 %tmp5, %tmp7 408 %tmp11 = xor i1 %tmp9, 1 409 br i1 %tmp11, label %bb2, label %bb1 410 411bb1: 412 store i32 0, i32 addrspace(1)* %out 413 br label %bb2 414 415bb2: 416 ret void 417} 418 419; FUNC-LABEL: setcc_v2i32_expand 420; GCN: v_cmp_gt_i32 421; GCN: v_cmp_gt_i32 422define amdgpu_kernel void @setcc_v2i32_expand( 423 <2 x i32> addrspace(1)* %a, 424 <2 x i32> addrspace(1)* %b, 425 <2 x i32> addrspace(1)* %c, 426 <2 x float> addrspace(1)* %r) { 427entry: 428 %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a 429 %b.val = load <2 x i32>, <2 x i32> addrspace(1)* %b 430 %c.val = load <2 x i32>, <2 x i32> addrspace(1)* %c 431 432 %icmp.val.1 = icmp sgt <2 x i32> %a.val, <i32 1, i32 1> 433 %zext.val.1 = zext <2 x i1> %icmp.val.1 to <2 x i32> 434 %shl.val.1 = shl nuw <2 x i32> %zext.val.1, <i32 31, i32 31> 435 %xor.val.1 = xor <2 x i32> %shl.val.1, %b.val 436 %bitcast.val.1 = bitcast <2 x i32> %xor.val.1 to <2 x float> 437 %icmp.val.2 = icmp sgt <2 x i32> %c.val, <i32 1199570944, i32 1199570944> 438 %select.val.1 = select <2 x i1> %icmp.val.2, <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> %bitcast.val.1 439 440 store <2 x float> %select.val.1, <2 x float> addrspace(1)* %r 441 ret void 442} 443 444; FUNC-LABEL: setcc_v4i32_expand 445; GCN: v_cmp_gt_i32 446; GCN: v_cmp_gt_i32 447; GCN: v_cmp_gt_i32 448; GCN: v_cmp_gt_i32 449define amdgpu_kernel void @setcc_v4i32_expand( 450 <4 x i32> addrspace(1)* %a, 451 <4 x i32> addrspace(1)* %b, 452 <4 x i32> addrspace(1)* %c, 453 <4 x float> addrspace(1)* %r) { 454entry: 455 %a.val = load <4 x i32>, <4 x i32> addrspace(1)* %a 456 %b.val = load <4 x i32>, <4 x i32> addrspace(1)* %b 457 %c.val = load <4 x i32>, <4 x i32> addrspace(1)* %c 458 459 %icmp.val.1 = icmp sgt <4 x i32> %a.val, <i32 1, i32 1, i32 1, i32 1> 460 %zext.val.1 = zext <4 x i1> %icmp.val.1 to <4 x i32> 461 %shl.val.1 = shl nuw <4 x i32> %zext.val.1, <i32 31, i32 31, i32 31, i32 31> 462 %xor.val.1 = xor <4 x i32> %shl.val.1, %b.val 463 %bitcast.val.1 = bitcast <4 x i32> %xor.val.1 to <4 x float> 464 %icmp.val.2 = icmp sgt <4 x i32> %c.val, <i32 1199570944, i32 1199570944, i32 1199570944, i32 1199570944> 465 %select.val.1 = select <4 x i1> %icmp.val.2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %bitcast.val.1 466 467 store <4 x float> %select.val.1, <4 x float> addrspace(1)* %r 468 ret void 469} 470 471attributes #0 = { nounwind } 472