1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2 3declare float @llvm.fmuladd.f32(float, float, float) 4declare double @llvm.fmuladd.f64(double, double, double) 5declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 6declare float @llvm.fabs.f32(float) nounwind readnone 7 8; CHECK-LABEL: {{^}}fmuladd_f32: 9; CHECK: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} 10 11define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1, 12 float addrspace(1)* %in2, float addrspace(1)* %in3) { 13 %r0 = load float, float addrspace(1)* %in1 14 %r1 = load float, float addrspace(1)* %in2 15 %r2 = load float, float addrspace(1)* %in3 16 %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2) 17 store float %r3, float addrspace(1)* %out 18 ret void 19} 20 21; CHECK-LABEL: {{^}}fmuladd_f64: 22; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 23 24define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, 25 double addrspace(1)* %in2, double addrspace(1)* %in3) { 26 %r0 = load double, double addrspace(1)* %in1 27 %r1 = load double, double addrspace(1)* %in2 28 %r2 = load double, double addrspace(1)* %in3 29 %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2) 30 store double %r3, double addrspace(1)* %out 31 ret void 32} 33 34; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32 35; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 36; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 37; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 38; CHECK: buffer_store_dword [[R2]] 39define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 40 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 41 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 42 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 43 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 44 45 %r1 = load volatile float, float addrspace(1)* %gep.0 46 %r2 = load volatile float, float addrspace(1)* %gep.1 47 48 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2) 49 store float %r3, float addrspace(1)* %gep.out 50 ret void 51} 52 53; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32 54; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 55; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 56; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 57; CHECK: buffer_store_dword [[R2]] 58define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 59 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 60 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 61 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 62 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 63 64 %r1 = load volatile float, float addrspace(1)* %gep.0 65 %r2 = load volatile float, float addrspace(1)* %gep.1 66 67 %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2) 68 store float %r3, float addrspace(1)* %gep.out 69 ret void 70} 71 72; CHECK-LABEL: {{^}}fadd_a_a_b_f32: 73; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 74; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 75; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 76; CHECK: buffer_store_dword [[R2]] 77define void @fadd_a_a_b_f32(float addrspace(1)* %out, 78 float addrspace(1)* %in1, 79 float addrspace(1)* %in2) { 80 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 81 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 82 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 83 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 84 85 %r0 = load volatile float, float addrspace(1)* %gep.0 86 %r1 = load volatile float, float addrspace(1)* %gep.1 87 88 %add.0 = fadd float %r0, %r0 89 %add.1 = fadd float %add.0, %r1 90 store float %add.1, float addrspace(1)* %out 91 ret void 92} 93 94; CHECK-LABEL: {{^}}fadd_b_a_a_f32: 95; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 96; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 97; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 98; CHECK: buffer_store_dword [[R2]] 99define void @fadd_b_a_a_f32(float addrspace(1)* %out, 100 float addrspace(1)* %in1, 101 float addrspace(1)* %in2) { 102 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 103 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 104 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 105 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 106 107 %r0 = load volatile float, float addrspace(1)* %gep.0 108 %r1 = load volatile float, float addrspace(1)* %gep.1 109 110 %add.0 = fadd float %r0, %r0 111 %add.1 = fadd float %r1, %add.0 112 store float %add.1, float addrspace(1)* %out 113 ret void 114} 115 116; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32 117; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 118; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 119; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]] 120; CHECK: buffer_store_dword [[R2]] 121define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 122 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 123 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 124 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 125 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 126 127 %r1 = load volatile float, float addrspace(1)* %gep.0 128 %r2 = load volatile float, float addrspace(1)* %gep.1 129 130 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2) 131 store float %r3, float addrspace(1)* %gep.out 132 ret void 133} 134 135 136; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32 137; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 138; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 139; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 140; CHECK: buffer_store_dword [[R2]] 141define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 142 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 143 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 144 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 145 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 146 147 %r1 = load volatile float, float addrspace(1)* %gep.0 148 %r2 = load volatile float, float addrspace(1)* %gep.1 149 150 %r1.fneg = fsub float -0.000000e+00, %r1 151 152 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2) 153 store float %r3, float addrspace(1)* %gep.out 154 ret void 155} 156 157 158; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32 159; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 160; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 161; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]] 162; CHECK: buffer_store_dword [[R2]] 163define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 164 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 165 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 166 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 167 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 168 169 %r1 = load volatile float, float addrspace(1)* %gep.0 170 %r2 = load volatile float, float addrspace(1)* %gep.1 171 172 %r1.fneg = fsub float -0.000000e+00, %r1 173 174 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2) 175 store float %r3, float addrspace(1)* %gep.out 176 ret void 177} 178 179 180; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32 181; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 182; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 183; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]] 184; CHECK: buffer_store_dword [[RESULT]] 185define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 186 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 187 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 188 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 189 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 190 191 %r1 = load volatile float, float addrspace(1)* %gep.0 192 %r2 = load volatile float, float addrspace(1)* %gep.1 193 194 %r2.fneg = fsub float -0.000000e+00, %r2 195 196 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg) 197 store float %r3, float addrspace(1)* %gep.out 198 ret void 199} 200