1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s 2; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s 3 4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s 5; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s 6 7; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s 8 9declare i32 @llvm.amdgcn.workitem.id.x() #1 10 11; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32: 12; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 13; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 14 15; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] 16 17; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]] 18; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] 19 20; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 21 22; EG: MAX 23define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { 24 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 25 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 26 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 27 28 %a = load volatile float, float addrspace(1)* %gep.0, align 4 29 %b = load volatile float, float addrspace(1)* %gep.1, align 4 30 31 %cmp = fcmp uge float %a, %b 32 %val = select i1 %cmp, float %a, float %b 33 store float %val, float addrspace(1)* %out, align 4 34 ret void 35} 36 37; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src: 38; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 39; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 40; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]] 41; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]] 42 43; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]] 44 45; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]] 46; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]] 47 48; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]] 49 50; EG: MAX 51define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 { 52 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 53 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 54 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 55 56 %a = load volatile float, float addrspace(1)* %gep.0, align 4 57 %b = load volatile float, float addrspace(1)* %gep.1, align 4 58 %a.nnan = fadd nnan float %a, 1.0 59 %b.nnan = fadd nnan float %b, 2.0 60 61 %cmp = fcmp uge float %a.nnan, %b.nnan 62 %val = select i1 %cmp, float %a.nnan, float %b.nnan 63 store float %val, float addrspace(1)* %out, align 4 64 ret void 65} 66 67; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32: 68; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 69; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 70 71; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 72 73; VI-SAFE: v_cmp_ge_f32_e32 vcc, [[A]], [[B]] 74; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] 75 76; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 77; EG: MAX 78define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { 79 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 80 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 81 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 82 83 %a = load volatile float, float addrspace(1)* %gep.0, align 4 84 %b = load volatile float, float addrspace(1)* %gep.1, align 4 85 86 %cmp = fcmp oge float %a, %b 87 %val = select i1 %cmp, float %a, float %b 88 store float %val, float addrspace(1)* %out, align 4 89 ret void 90} 91 92; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32: 93; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 94; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 95 96; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] 97 98; VI-SAFE: v_cmp_nle_f32_e32 vcc, [[A]], [[B]] 99; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] 100 101 102; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 103; EG: MAX 104define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { 105 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 106 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 107 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 108 109 %a = load volatile float, float addrspace(1)* %gep.0, align 4 110 %b = load volatile float, float addrspace(1)* %gep.1, align 4 111 112 %cmp = fcmp ugt float %a, %b 113 %val = select i1 %cmp, float %a, float %b 114 store float %val, float addrspace(1)* %out, align 4 115 ret void 116} 117 118; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32: 119; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 120; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 121 122; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 123 124; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]] 125; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] 126 127; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 128; EG: MAX 129define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { 130 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 131 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 132 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 133 134 %a = load volatile float, float addrspace(1)* %gep.0, align 4 135 %b = load volatile float, float addrspace(1)* %gep.1, align 4 136 137 %cmp = fcmp ogt float %a, %b 138 %val = select i1 %cmp, float %a, float %b 139 store float %val, float addrspace(1)* %out, align 4 140 ret void 141} 142 143; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32: 144; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 145; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 146 147; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 148 149; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]] 150; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] 151 152 153; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] 154; EG: MAX 155define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { 156 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 157 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid 158 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 159 160 %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0 161 %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1 162 163 %cmp = fcmp ogt <1 x float> %a, %b 164 %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b 165 store <1 x float> %val, <1 x float> addrspace(1)* %out 166 ret void 167} 168 169; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32: 170; SI-SAFE: v_max_legacy_f32_e32 171; SI-SAFE: v_max_legacy_f32_e32 172; SI-SAFE: v_max_legacy_f32_e32 173 174; VI-SAFE: v_cmp_gt_f32_e32 175; VI-SAFE: v_cndmask_b32_e32 176; VI-SAFE: v_cmp_gt_f32_e32 177; VI-SAFE: v_cndmask_b32_e32 178; VI-SAFE: v_cmp_gt_f32_e32 179; VI-SAFE: v_cndmask_b32_e32 180; VI-SAFE-NOT: v_cmp 181; VI-SAFE-NOT: v_cndmask 182 183; GCN-NONAN: v_max_f32_e32 184; GCN-NONAN: v_max_f32_e32 185; GCN-NONAN: v_max_f32_e32 186 187; GCN-NOT: v_max 188define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { 189 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 190 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid 191 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 192 193 %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0 194 %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1 195 196 %cmp = fcmp ogt <3 x float> %a, %b 197 %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b 198 store <3 x float> %val, <3 x float> addrspace(1)* %out 199 ret void 200} 201 202; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use: 203; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 204; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 205; GCN-NOT: v_max_ 206; GCN: v_cmp_gt_f32 207; GCN-NEXT: v_cndmask_b32 208; GCN-NOT: v_max_ 209 210; EG: MAX 211define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { 212 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 213 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid 214 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 215 216 %a = load volatile float, float addrspace(1)* %gep.0, align 4 217 %b = load volatile float, float addrspace(1)* %gep.1, align 4 218 219 %cmp = fcmp ogt float %a, %b 220 %val = select i1 %cmp, float %a, float %b 221 store float %val, float addrspace(1)* %out0, align 4 222 store i1 %cmp, i1addrspace(1)* %out1 223 ret void 224} 225 226attributes #0 = { nounwind } 227attributes #1 = { nounwind readnone } 228