1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; Make sure to test with f32 and i32 compares. If we have to use float 4; compares, we always have multiple condition registers. If we can do 5; scalar compares, we don't want to use multiple condition registers. 6 7; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32: 8; GCN-DAG: v_cmp_ne_u32_e32 vcc, 9; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 10; GCN: s_and_b64 vcc, vcc, [[CMP1]] 11; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 12; GCN-NOT: [[RESULT]] 13; GCN: buffer_store_dword [[RESULT]] 14define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 { 15 %icmp0 = icmp ne i32 %a, %b 16 %icmp1 = icmp ne i32 %a, %c 17 %and = and i1 %icmp0, %icmp1 18 %select = select i1 %and, i32 %x, i32 %y 19 store i32 %select, i32 addrspace(1)* %out 20 ret void 21} 22 23; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32: 24; GCN-DAG: v_cmp_lg_f32_e32 vcc 25; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 26; GCN: s_and_b64 vcc, vcc, [[CMP1]] 27; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 28; GCN-NOT: [[RESULT]] 29; GCN: buffer_store_dword [[RESULT]] 30define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 { 31 %fcmp0 = fcmp one float %a, %b 32 %fcmp1 = fcmp one float %a, %c 33 %and = and i1 %fcmp0, %fcmp1 34 %select = select i1 %and, i32 %x, i32 %y 35 store i32 %select, i32 addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32: 40; GCN-DAG: v_cmp_ne_u32_e32 vcc, 41; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 42; GCN: s_and_b64 vcc, vcc, [[CMP1]] 43; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 44; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 45; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} 46define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 { 47 %icmp0 = icmp ne i32 %a, %b 48 %icmp1 = icmp ne i32 %a, %c 49 %and = and i1 %icmp0, %icmp1 50 %select = select i1 %and, i64 %x, i64 %y 51 store i64 %select, i64 addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32: 56; GCN-DAG: v_cmp_lg_f32_e32 vcc, 57; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 58; GCN: s_and_b64 vcc, vcc, [[CMP1]] 59; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 60; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 61; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} 62define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 { 63 %fcmp0 = fcmp one float %a, %b 64 %fcmp1 = fcmp one float %a, %c 65 %and = and i1 %fcmp0, %fcmp1 66 %select = select i1 %and, i64 %x, i64 %y 67 store i64 %select, i64 addrspace(1)* %out 68 ret void 69} 70 71; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32: 72; GCN-DAG: v_cmp_ne_u32_e32 vcc, 73; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 74; GCN: s_or_b64 vcc, vcc, [[CMP1]] 75; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 76; GCN-NOT: [[RESULT]] 77; GCN: buffer_store_dword [[RESULT]] 78; GCN: s_endpgm 79define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 { 80 %icmp0 = icmp ne i32 %a, %b 81 %icmp1 = icmp ne i32 %a, %c 82 %or = or i1 %icmp0, %icmp1 83 %select = select i1 %or, i32 %x, i32 %y 84 store i32 %select, i32 addrspace(1)* %out 85 ret void 86} 87 88; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32: 89; GCN-DAG: v_cmp_lg_f32_e32 vcc 90; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 91; GCN: s_or_b64 vcc, vcc, [[CMP1]] 92; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 93; GCN-NOT: [[RESULT]] 94; GCN: buffer_store_dword [[RESULT]] 95define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 { 96 %fcmp0 = fcmp one float %a, %b 97 %fcmp1 = fcmp one float %a, %c 98 %or = or i1 %fcmp0, %fcmp1 99 %select = select i1 %or, i32 %x, i32 %y 100 store i32 %select, i32 addrspace(1)* %out 101 ret void 102} 103 104; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32: 105; GCN-DAG: v_cmp_ne_u32_e32 vcc, 106; GCN-DAG: v_cmp_ne_u32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 107; GCN: s_or_b64 vcc, vcc, [[CMP1]] 108; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 109; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 110; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} 111define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 { 112 %icmp0 = icmp ne i32 %a, %b 113 %icmp1 = icmp ne i32 %a, %c 114 %or = or i1 %icmp0, %icmp1 115 %select = select i1 %or, i64 %x, i64 %y 116 store i64 %select, i64 addrspace(1)* %out 117 ret void 118} 119 120; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32: 121; GCN-DAG: v_cmp_lg_f32_e32 vcc, 122; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] 123; GCN: s_or_b64 vcc, vcc, [[CMP1]] 124; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 125; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc 126; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} 127define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 { 128 %fcmp0 = fcmp one float %a, %b 129 %fcmp1 = fcmp one float %a, %c 130 %or = or i1 %fcmp0, %fcmp1 131 %select = select i1 %or, i64 %x, i64 %y 132 store i64 %select, i64 addrspace(1)* %out 133 ret void 134} 135 136; GCN-LABEL: {{^}}regression: 137; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0 138; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0 139; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0 140 141define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 { 142entry: 143 %cmp0 = fcmp oeq float %c0, 1.0 144 br i1 %cmp0, label %if0, label %endif 145 146if0: 147 %cmp1 = fcmp oeq float %c1, 0.0 148 br i1 %cmp1, label %if1, label %endif 149 150if1: 151 %cmp2 = xor i1 %cmp1, true 152 br label %endif 153 154endif: 155 %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ] 156 %tmp2 = select i1 %tmp0, float 4.0, float 0.0 157 store float %tmp2, float addrspace(1)* %out 158 ret void 159} 160 161attributes #0 = { nounwind } 162