1; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s 3 4; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s 5; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s 6 7; -------------------------------------------------------------------------------- 8; fadd tests 9; -------------------------------------------------------------------------------- 10 11; GCN-LABEL: {{^}}v_fneg_add_f32: 12; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 13; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 14 15; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 16; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 17 18; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]] 19; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 20define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 21 %tid = call i32 @llvm.amdgcn.workitem.id.x() 22 %tid.ext = sext i32 %tid to i64 23 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 24 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 25 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 26 %a = load volatile float, float addrspace(1)* %a.gep 27 %b = load volatile float, float addrspace(1)* %b.gep 28 %add = fadd float %a, %b 29 %fneg = fneg float %add 30 store float %fneg, float addrspace(1)* %out.gep 31 ret void 32} 33 34; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32: 35; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 36; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 37; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 38; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 39; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]] 40; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 41define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 42 %tid = call i32 @llvm.amdgcn.workitem.id.x() 43 %tid.ext = sext i32 %tid to i64 44 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 45 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 46 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 47 %a = load volatile float, float addrspace(1)* %a.gep 48 %b = load volatile float, float addrspace(1)* %b.gep 49 %add = fadd float %a, %b 50 %fneg = fneg float %add 51 store volatile float %fneg, float addrspace(1)* %out 52 store volatile float %add, float addrspace(1)* %out 53 ret void 54} 55 56; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32: 57; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 58; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 59 60; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 61; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 62; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] 63 64; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]] 65; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]] 66 67; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]] 68; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 69define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 70 %tid = call i32 @llvm.amdgcn.workitem.id.x() 71 %tid.ext = sext i32 %tid to i64 72 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 73 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 74 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 75 %a = load volatile float, float addrspace(1)* %a.gep 76 %b = load volatile float, float addrspace(1)* %b.gep 77 %add = fadd float %a, %b 78 %fneg = fneg float %add 79 %use1 = fmul float %add, 4.0 80 store volatile float %fneg, float addrspace(1)* %out 81 store volatile float %use1, float addrspace(1)* %out 82 ret void 83} 84 85; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32: 86; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 87; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 88 89; GCN-SAFE: v_sub_f32_e32 90; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000, 91 92; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 93 94; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 95define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 96 %tid = call i32 @llvm.amdgcn.workitem.id.x() 97 %tid.ext = sext i32 %tid to i64 98 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 99 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 100 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 101 %a = load volatile float, float addrspace(1)* %a.gep 102 %b = load volatile float, float addrspace(1)* %b.gep 103 %fneg.a = fneg float %a 104 %add = fadd float %fneg.a, %b 105 %fneg = fneg float %add 106 store volatile float %fneg, float addrspace(1)* %out 107 ret void 108} 109 110; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32: 111; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 112; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 113 114; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 115; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 116 117; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 118; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 119define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 120 %tid = call i32 @llvm.amdgcn.workitem.id.x() 121 %tid.ext = sext i32 %tid to i64 122 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 123 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 124 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 125 %a = load volatile float, float addrspace(1)* %a.gep 126 %b = load volatile float, float addrspace(1)* %b.gep 127 %fneg.b = fneg float %b 128 %add = fadd float %a, %fneg.b 129 %fneg = fneg float %add 130 store volatile float %fneg, float addrspace(1)* %out 131 ret void 132} 133 134; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32: 135; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 136; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 137 138; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]] 139; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 140 141; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 142; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 143define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 144 %tid = call i32 @llvm.amdgcn.workitem.id.x() 145 %tid.ext = sext i32 %tid to i64 146 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 147 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 148 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 149 %a = load volatile float, float addrspace(1)* %a.gep 150 %b = load volatile float, float addrspace(1)* %b.gep 151 %fneg.a = fneg float %a 152 %fneg.b = fneg float %b 153 %add = fadd float %fneg.a, %fneg.b 154 %fneg = fneg float %add 155 store volatile float %fneg, float addrspace(1)* %out 156 ret void 157} 158 159; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32: 160; GCN-SAFE-DAG: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1{{$}} 161; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 162; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 163 164; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[SIGNBIT]], [[A]] 165; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 166; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[SIGNBIT]], [[ADD]] 167 168; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 169; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] 170; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]] 171; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]] 172define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 173 %tid = call i32 @llvm.amdgcn.workitem.id.x() 174 %tid.ext = sext i32 %tid to i64 175 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 176 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 177 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 178 %a = load volatile float, float addrspace(1)* %a.gep 179 %b = load volatile float, float addrspace(1)* %b.gep 180 %fneg.a = fneg float %a 181 %add = fadd float %fneg.a, %b 182 %fneg = fneg float %add 183 store volatile float %fneg, float addrspace(1)* %out 184 store volatile float %fneg.a, float addrspace(1)* %out 185 ret void 186} 187 188; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32: 189; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 190; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 191 192; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 193; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 194; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 195 196; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] 197; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 198; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]] 199; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 200define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 201 %tid = call i32 @llvm.amdgcn.workitem.id.x() 202 %tid.ext = sext i32 %tid to i64 203 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 204 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 205 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 206 %a = load volatile float, float addrspace(1)* %a.gep 207 %b = load volatile float, float addrspace(1)* %b.gep 208 %fneg.a = fneg float %a 209 %add = fadd float %fneg.a, %b 210 %fneg = fneg float %add 211 %use1 = fmul float %fneg.a, %c 212 store volatile float %fneg, float addrspace(1)* %out 213 store volatile float %use1, float addrspace(1)* %out 214 ret void 215} 216 217; This one asserted with -enable-no-signed-zeros-fp-math 218; GCN-LABEL: {{^}}fneg_fadd_0: 219; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]], 220; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]] 221; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]] 222define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 { 223.entry: 224 %tmp7 = fdiv float 1.000000e+00, %tmp6 225 %tmp8 = fmul float 0.000000e+00, %tmp7 226 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 227 %.i188 = fadd float %tmp9, 0.000000e+00 228 %tmp10 = fcmp uge float %.i188, %tmp2 229 %tmp11 = fneg float %.i188 230 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 231 %tmp12 = fcmp ule float %.i092, 0.000000e+00 232 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 233 ret float %.i198 234} 235 236; This is a workaround because -enable-no-signed-zeros-fp-math does not set up 237; function attribute unsafe-fp-math automatically. Combine with the previous test 238; when that is done. 239; GCN-LABEL: {{^}}fneg_fadd_0_nsz: 240; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]], 241; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]], 242; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 243; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], 244; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]] 245define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 { 246.entry: 247 %tmp7 = fdiv float 1.000000e+00, %tmp6 248 %tmp8 = fmul float 0.000000e+00, %tmp7 249 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 250 %.i188 = fadd float %tmp9, 0.000000e+00 251 %tmp10 = fcmp uge float %.i188, %tmp2 252 %tmp11 = fneg float %.i188 253 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 254 %tmp12 = fcmp ule float %.i092, 0.000000e+00 255 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 256 ret float %.i198 257} 258 259; -------------------------------------------------------------------------------- 260; fmul tests 261; -------------------------------------------------------------------------------- 262 263; GCN-LABEL: {{^}}v_fneg_mul_f32: 264; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 265; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 266; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 267; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 268define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 269 %tid = call i32 @llvm.amdgcn.workitem.id.x() 270 %tid.ext = sext i32 %tid to i64 271 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 272 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 273 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 274 %a = load volatile float, float addrspace(1)* %a.gep 275 %b = load volatile float, float addrspace(1)* %b.gep 276 %mul = fmul float %a, %b 277 %fneg = fneg float %mul 278 store float %fneg, float addrspace(1)* %out.gep 279 ret void 280} 281 282; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32: 283; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 284; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 285; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 286; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]] 287; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]] 288; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 289define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 290 %tid = call i32 @llvm.amdgcn.workitem.id.x() 291 %tid.ext = sext i32 %tid to i64 292 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 293 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 294 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 295 %a = load volatile float, float addrspace(1)* %a.gep 296 %b = load volatile float, float addrspace(1)* %b.gep 297 %mul = fmul float %a, %b 298 %fneg = fneg float %mul 299 store volatile float %fneg, float addrspace(1)* %out 300 store volatile float %mul, float addrspace(1)* %out 301 ret void 302} 303 304; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32: 305; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 306; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 307; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]] 308; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]] 309 310; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]] 311; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 312define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 313 %tid = call i32 @llvm.amdgcn.workitem.id.x() 314 %tid.ext = sext i32 %tid to i64 315 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 316 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 317 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 318 %a = load volatile float, float addrspace(1)* %a.gep 319 %b = load volatile float, float addrspace(1)* %b.gep 320 %mul = fmul float %a, %b 321 %fneg = fneg float %mul 322 %use1 = fmul float %mul, 4.0 323 store volatile float %fneg, float addrspace(1)* %out 324 store volatile float %use1, float addrspace(1)* %out 325 ret void 326} 327 328; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32: 329; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 330; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 331; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 332; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 333define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 334 %tid = call i32 @llvm.amdgcn.workitem.id.x() 335 %tid.ext = sext i32 %tid to i64 336 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 337 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 338 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 339 %a = load volatile float, float addrspace(1)* %a.gep 340 %b = load volatile float, float addrspace(1)* %b.gep 341 %fneg.a = fneg float %a 342 %mul = fmul float %fneg.a, %b 343 %fneg = fneg float %mul 344 store volatile float %fneg, float addrspace(1)* %out 345 ret void 346} 347 348; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32: 349; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 350; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 351; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 352; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 353define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 354 %tid = call i32 @llvm.amdgcn.workitem.id.x() 355 %tid.ext = sext i32 %tid to i64 356 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 357 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 358 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 359 %a = load volatile float, float addrspace(1)* %a.gep 360 %b = load volatile float, float addrspace(1)* %b.gep 361 %fneg.b = fneg float %b 362 %mul = fmul float %a, %fneg.b 363 %fneg = fneg float %mul 364 store volatile float %fneg, float addrspace(1)* %out 365 ret void 366} 367 368; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32: 369; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 370; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 371; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 372; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 373define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 374 %tid = call i32 @llvm.amdgcn.workitem.id.x() 375 %tid.ext = sext i32 %tid to i64 376 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 377 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 378 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 379 %a = load volatile float, float addrspace(1)* %a.gep 380 %b = load volatile float, float addrspace(1)* %b.gep 381 %fneg.a = fneg float %a 382 %fneg.b = fneg float %b 383 %mul = fmul float %fneg.a, %fneg.b 384 %fneg = fneg float %mul 385 store volatile float %fneg, float addrspace(1)* %out 386 ret void 387} 388 389; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32: 390; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 391; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 392; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 393; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]] 394 395; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]] 396; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]] 397define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 398 %tid = call i32 @llvm.amdgcn.workitem.id.x() 399 %tid.ext = sext i32 %tid to i64 400 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 401 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 402 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 403 %a = load volatile float, float addrspace(1)* %a.gep 404 %b = load volatile float, float addrspace(1)* %b.gep 405 %fneg.a = fneg float %a 406 %mul = fmul float %fneg.a, %b 407 %fneg = fneg float %mul 408 store volatile float %fneg, float addrspace(1)* %out 409 store volatile float %fneg.a, float addrspace(1)* %out 410 ret void 411} 412 413; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32: 414; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 415; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 416; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]] 417; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 418; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]] 419; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 420define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 421 %tid = call i32 @llvm.amdgcn.workitem.id.x() 422 %tid.ext = sext i32 %tid to i64 423 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 424 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 425 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 426 %a = load volatile float, float addrspace(1)* %a.gep 427 %b = load volatile float, float addrspace(1)* %b.gep 428 %fneg.a = fneg float %a 429 %mul = fmul float %fneg.a, %b 430 %fneg = fneg float %mul 431 %use1 = fmul float %fneg.a, %c 432 store volatile float %fneg, float addrspace(1)* %out 433 store volatile float %use1, float addrspace(1)* %out 434 ret void 435} 436 437; -------------------------------------------------------------------------------- 438; fminnum tests 439; -------------------------------------------------------------------------------- 440 441; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee: 442; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 443; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 444; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]] 445; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]] 446; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]] 447; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 448define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 449 %tid = call i32 @llvm.amdgcn.workitem.id.x() 450 %tid.ext = sext i32 %tid to i64 451 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 452 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 453 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 454 %a = load volatile float, float addrspace(1)* %a.gep 455 %b = load volatile float, float addrspace(1)* %b.gep 456 %min = call float @llvm.minnum.f32(float %a, float %b) 457 %fneg = fneg float %min 458 store float %fneg, float addrspace(1)* %out.gep 459 ret void 460} 461 462; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee: 463; GCN-NOT: v0 464; GCN-NOT: v1 465; GCN: v_max_f32_e64 v0, -v0, -v1 466; GCN-NEXT: ; return 467define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 { 468 %min = call float @llvm.minnum.f32(float %a, float %b) 469 %fneg = fneg float %min 470 ret float %fneg 471} 472 473; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee: 474; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 475; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]] 476; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]] 477; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 478define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 479 %tid = call i32 @llvm.amdgcn.workitem.id.x() 480 %tid.ext = sext i32 %tid to i64 481 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 482 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 483 %a = load volatile float, float addrspace(1)* %a.gep 484 %min = call float @llvm.minnum.f32(float %a, float %a) 485 %min.fneg = fsub float -0.0, %min 486 store float %min.fneg, float addrspace(1)* %out.gep 487 ret void 488} 489 490; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee: 491; GCN-NOT: v0 492; GCN: v_max_f32_e64 v0, -v0, -v0 493; GCN-NEXT: ; return 494define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 { 495 %min = call float @llvm.minnum.f32(float %a, float %a) 496 %min.fneg = fsub float -0.0, %min 497 ret float %min.fneg 498} 499 500; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee: 501; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 502; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]] 503; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]] 504; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 505define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 506 %tid = call i32 @llvm.amdgcn.workitem.id.x() 507 %tid.ext = sext i32 %tid to i64 508 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 509 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 510 %a = load volatile float, float addrspace(1)* %a.gep 511 %min = call float @llvm.minnum.f32(float 4.0, float %a) 512 %fneg = fneg float %min 513 store float %fneg, float addrspace(1)* %out.gep 514 ret void 515} 516 517; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee: 518; GCN-NOT: v0 519; GCN: v_max_f32_e64 v0, -v0, -4.0 520; GCN-NEXT: ; return 521define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 { 522 %min = call float @llvm.minnum.f32(float 4.0, float %a) 523 %fneg = fneg float %min 524 ret float %fneg 525} 526 527; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee: 528; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 529; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]] 530; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]] 531; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 532define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 533 %tid = call i32 @llvm.amdgcn.workitem.id.x() 534 %tid.ext = sext i32 %tid to i64 535 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 536 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 537 %a = load volatile float, float addrspace(1)* %a.gep 538 %min = call float @llvm.minnum.f32(float -4.0, float %a) 539 %fneg = fneg float %min 540 store float %fneg, float addrspace(1)* %out.gep 541 ret void 542} 543 544; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee: 545; GCN-NOT: v0 546; GCN: v_max_f32_e64 v0, -v0, 4.0 547; GCN-NEXT: ; return 548define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 { 549 %min = call float @llvm.minnum.f32(float -4.0, float %a) 550 %fneg = fneg float %min 551 ret float %fneg 552} 553 554; GCN-LABEL: {{^}}v_fneg_0_minnum_f32: 555; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 556; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]] 557; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 558define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 559 %tid = call i32 @llvm.amdgcn.workitem.id.x() 560 %tid.ext = sext i32 %tid to i64 561 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 562 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 563 %a = load volatile float, float addrspace(1)* %a.gep 564 %min = call float @llvm.minnum.f32(float 0.0, float %a) 565 %fneg = fneg float %min 566 store float %fneg, float addrspace(1)* %out.gep 567 ret void 568} 569 570; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee: 571; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 572; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]] 573; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]] 574; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 575define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 576 %tid = call i32 @llvm.amdgcn.workitem.id.x() 577 %tid.ext = sext i32 %tid to i64 578 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 579 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 580 %a = load volatile float, float addrspace(1)* %a.gep 581 %min = call float @llvm.minnum.f32(float -0.0, float %a) 582 %fneg = fneg float %min 583 store float %fneg, float addrspace(1)* %out.gep 584 ret void 585} 586 587; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32: 588; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 589 590; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]] 591; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]] 592 593; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]] 594; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]] 595; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]] 596 597; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 598define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 599 %tid = call i32 @llvm.amdgcn.workitem.id.x() 600 %tid.ext = sext i32 %tid to i64 601 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 602 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 603 %a = load volatile float, float addrspace(1)* %a.gep 604 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) 605 %fneg = fneg float %min 606 store float %fneg, float addrspace(1)* %out.gep 607 ret void 608} 609 610; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32: 611; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 612 613; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]] 614; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]] 615 616; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]] 617; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]] 618 619; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 620define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 621 %tid = call i32 @llvm.amdgcn.workitem.id.x() 622 %tid.ext = sext i32 %tid to i64 623 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 624 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 625 %a = load volatile float, float addrspace(1)* %a.gep 626 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a) 627 %fneg = fneg float %min 628 store float %fneg, float addrspace(1)* %out.gep 629 ret void 630} 631 632; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16: 633; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] 634 635; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] 636; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]] 637; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]] 638 639; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]] 640; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]] 641; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]] 642 643; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 644define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 645 %tid = call i32 @llvm.amdgcn.workitem.id.x() 646 %tid.ext = sext i32 %tid to i64 647 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 648 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 649 %a = load volatile half, half addrspace(1)* %a.gep 650 %min = call half @llvm.minnum.f16(half 0xH3118, half %a) 651 %fneg = fsub half -0.000000e+00, %min 652 store half %fneg, half addrspace(1)* %out.gep 653 ret void 654} 655 656; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16: 657; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] 658 659; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] 660; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]] 661; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]] 662 663; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]] 664; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]] 665 666; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 667define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 668 %tid = call i32 @llvm.amdgcn.workitem.id.x() 669 %tid.ext = sext i32 %tid to i64 670 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 671 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 672 %a = load volatile half, half addrspace(1)* %a.gep 673 %min = call half @llvm.minnum.f16(half 0xHB118, half %a) 674 %fneg = fsub half -0.000000e+00, %min 675 store half %fneg, half addrspace(1)* %out.gep 676 ret void 677} 678 679; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64: 680; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 681 682; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30 683; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882 684; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]] 685; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} 686 687; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494 688; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]] 689 690; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 691define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 692 %tid = call i32 @llvm.amdgcn.workitem.id.x() 693 %tid.ext = sext i32 %tid to i64 694 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 695 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 696 %a = load volatile double, double addrspace(1)* %a.gep 697 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a) 698 %fneg = fsub double -0.000000e+00, %min 699 store double %fneg, double addrspace(1)* %out.gep 700 ret void 701} 702 703; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64: 704; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 705 706; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30 707; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882 708; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]] 709; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} 710 711; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]] 712; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494 713 714; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 715define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 716 %tid = call i32 @llvm.amdgcn.workitem.id.x() 717 %tid.ext = sext i32 %tid to i64 718 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 719 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 720 %a = load volatile double, double addrspace(1)* %a.gep 721 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a) 722 %fneg = fsub double -0.000000e+00, %min 723 store double %fneg, double addrspace(1)* %out.gep 724 ret void 725} 726 727; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee: 728; GCN-NOT: v0 729; GCN: v_max_f32_e64 v0, -v0, 0{{$}} 730; GCN-NEXT: ; return 731define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 { 732 %min = call float @llvm.minnum.f32(float -0.0, float %a) 733 %fneg = fneg float %min 734 ret float %fneg 735} 736 737; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee: 738; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 739; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 740; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]] 741; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]] 742; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]] 743; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 744define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 745 %tid = call i32 @llvm.amdgcn.workitem.id.x() 746 %tid.ext = sext i32 %tid to i64 747 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 748 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 749 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 750 %a = load volatile float, float addrspace(1)* %a.gep 751 %b = load volatile float, float addrspace(1)* %b.gep 752 %min = call float @llvm.minnum.f32(float 0.0, float %a) 753 %fneg = fneg float %min 754 %mul = fmul float %fneg, %b 755 store float %mul, float addrspace(1)* %out.gep 756 ret void 757} 758 759; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32: 760; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 761; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 762 763; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]] 764 765; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]] 766; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]] 767 768; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]] 769; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]] 770; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]] 771 772; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 773define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 774 %tid = call i32 @llvm.amdgcn.workitem.id.x() 775 %tid.ext = sext i32 %tid to i64 776 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 777 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 778 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 779 %a = load volatile float, float addrspace(1)* %a.gep 780 %b = load volatile float, float addrspace(1)* %b.gep 781 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) 782 %fneg = fneg float %min 783 %mul = fmul float %fneg, %b 784 store float %mul, float addrspace(1)* %out.gep 785 ret void 786} 787 788; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee: 789; GCN-NOT: v0 790; GCN-NOT: v1 791; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0 792; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1 793; GCN-NEXT: ; return 794define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 { 795 %min = call float @llvm.minnum.f32(float 0.0, float %a) 796 %fneg = fneg float %min 797 %mul = fmul float %fneg, %b 798 ret float %mul 799} 800 801; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee: 802; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 803; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 804; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]] 805; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]] 806; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]] 807; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] 808; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]] 809; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 810define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 811 %tid = call i32 @llvm.amdgcn.workitem.id.x() 812 %tid.ext = sext i32 %tid to i64 813 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 814 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 815 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 816 %a = load volatile float, float addrspace(1)* %a.gep 817 %b = load volatile float, float addrspace(1)* %b.gep 818 %min = call float @llvm.minnum.f32(float %a, float %b) 819 %fneg = fneg float %min 820 %use1 = fmul float %min, 4.0 821 store volatile float %fneg, float addrspace(1)* %out 822 store volatile float %use1, float addrspace(1)* %out 823 ret void 824} 825 826; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee: 827; GCN-NOT: v0 828; GCN-NOT: v1 829; GCN: v_max_f32_e64 v0, -v0, -v1 830; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 831; GCN-NEXT: ; return 832define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 { 833 %min = call float @llvm.minnum.f32(float %a, float %b) 834 %fneg = fneg float %min 835 %use1 = fmul float %min, 4.0 836 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 837 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 838 ret <2 x float> %ins1 839} 840 841; -------------------------------------------------------------------------------- 842; fmaxnum tests 843; -------------------------------------------------------------------------------- 844 845 846; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee: 847; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 848; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 849; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]] 850; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]] 851; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]] 852; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 853define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 854 %tid = call i32 @llvm.amdgcn.workitem.id.x() 855 %tid.ext = sext i32 %tid to i64 856 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 857 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 858 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 859 %a = load volatile float, float addrspace(1)* %a.gep 860 %b = load volatile float, float addrspace(1)* %b.gep 861 %max = call float @llvm.maxnum.f32(float %a, float %b) 862 %fneg = fneg float %max 863 store float %fneg, float addrspace(1)* %out.gep 864 ret void 865} 866 867; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee: 868; GCN-NOT: v0 869; GCN-NOT: v1 870; GCN: v_min_f32_e64 v0, -v0, -v1 871; GCN-NEXT: ; return 872define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 { 873 %max = call float @llvm.maxnum.f32(float %a, float %b) 874 %fneg = fneg float %max 875 ret float %fneg 876} 877 878; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee: 879; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 880; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]] 881; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]] 882; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 883define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 884 %tid = call i32 @llvm.amdgcn.workitem.id.x() 885 %tid.ext = sext i32 %tid to i64 886 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 887 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 888 %a = load volatile float, float addrspace(1)* %a.gep 889 %max = call float @llvm.maxnum.f32(float %a, float %a) 890 %max.fneg = fsub float -0.0, %max 891 store float %max.fneg, float addrspace(1)* %out.gep 892 ret void 893} 894 895; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee: 896; GCN-NOT: v0 897; GCN: v_min_f32_e64 v0, -v0, -v0 898; GCN-NEXT: ; return 899define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 { 900 %max = call float @llvm.maxnum.f32(float %a, float %a) 901 %max.fneg = fsub float -0.0, %max 902 ret float %max.fneg 903} 904 905; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee: 906; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 907; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]] 908; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]] 909; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 910define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 911 %tid = call i32 @llvm.amdgcn.workitem.id.x() 912 %tid.ext = sext i32 %tid to i64 913 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 914 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 915 %a = load volatile float, float addrspace(1)* %a.gep 916 %max = call float @llvm.maxnum.f32(float 4.0, float %a) 917 %fneg = fneg float %max 918 store float %fneg, float addrspace(1)* %out.gep 919 ret void 920} 921 922; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee: 923; GCN-NOT: v0 924; GCN: v_min_f32_e64 v0, -v0, -4.0 925; GCN-NEXT: ; return 926define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 { 927 %max = call float @llvm.maxnum.f32(float 4.0, float %a) 928 %fneg = fneg float %max 929 ret float %fneg 930} 931 932; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee: 933; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 934; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]] 935; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]] 936; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 937define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 938 %tid = call i32 @llvm.amdgcn.workitem.id.x() 939 %tid.ext = sext i32 %tid to i64 940 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 941 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 942 %a = load volatile float, float addrspace(1)* %a.gep 943 %max = call float @llvm.maxnum.f32(float -4.0, float %a) 944 %fneg = fneg float %max 945 store float %fneg, float addrspace(1)* %out.gep 946 ret void 947} 948 949; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee: 950; GCN-NOT: v0 951; GCN: v_min_f32_e64 v0, -v0, 4.0 952; GCN-NEXT: ; return 953define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 { 954 %max = call float @llvm.maxnum.f32(float -4.0, float %a) 955 %fneg = fneg float %max 956 ret float %fneg 957} 958 959; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32: 960; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 961; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]] 962; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 963define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 964 %tid = call i32 @llvm.amdgcn.workitem.id.x() 965 %tid.ext = sext i32 %tid to i64 966 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 967 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 968 %a = load volatile float, float addrspace(1)* %a.gep 969 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 970 %fneg = fneg float %max 971 store float %fneg, float addrspace(1)* %out.gep 972 ret void 973} 974 975; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee: 976; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 977; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]] 978; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]] 979; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 980define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 981 %tid = call i32 @llvm.amdgcn.workitem.id.x() 982 %tid.ext = sext i32 %tid to i64 983 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 984 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 985 %a = load volatile float, float addrspace(1)* %a.gep 986 %max = call float @llvm.maxnum.f32(float -0.0, float %a) 987 %fneg = fneg float %max 988 store float %fneg, float addrspace(1)* %out.gep 989 ret void 990} 991 992; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee: 993; GCN-NOT: v0 994; GCN: v_min_f32_e64 v0, -v0, 0{{$}} 995; GCN-NEXT: ; return 996define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 { 997 %max = call float @llvm.maxnum.f32(float -0.0, float %a) 998 %fneg = fneg float %max 999 ret float %fneg 1000} 1001 1002; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee: 1003; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1004; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1005; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]] 1006; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]] 1007; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]] 1008; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1009define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1010 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1011 %tid.ext = sext i32 %tid to i64 1012 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1013 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1014 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1015 %a = load volatile float, float addrspace(1)* %a.gep 1016 %b = load volatile float, float addrspace(1)* %b.gep 1017 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 1018 %fneg = fneg float %max 1019 %mul = fmul float %fneg, %b 1020 store float %mul, float addrspace(1)* %out.gep 1021 ret void 1022} 1023 1024; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee: 1025; GCN-NOT: v0 1026; GCN-NOT: v1 1027; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0 1028; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1 1029; GCN-NEXT: ; return 1030define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 { 1031 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 1032 %fneg = fneg float %max 1033 %mul = fmul float %fneg, %b 1034 ret float %mul 1035} 1036 1037; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee: 1038; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1039; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1040; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]] 1041; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]] 1042; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]] 1043; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] 1044; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]] 1045; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 1046define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1047 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1048 %tid.ext = sext i32 %tid to i64 1049 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1050 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1051 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1052 %a = load volatile float, float addrspace(1)* %a.gep 1053 %b = load volatile float, float addrspace(1)* %b.gep 1054 %max = call float @llvm.maxnum.f32(float %a, float %b) 1055 %fneg = fneg float %max 1056 %use1 = fmul float %max, 4.0 1057 store volatile float %fneg, float addrspace(1)* %out 1058 store volatile float %use1, float addrspace(1)* %out 1059 ret void 1060} 1061 1062; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee: 1063; GCN-NOT: v0 1064; GCN-NOT: v1 1065; GCN: v_min_f32_e64 v0, -v0, -v1 1066; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 1067; GCN-NEXT: ; return 1068define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 { 1069 %max = call float @llvm.maxnum.f32(float %a, float %b) 1070 %fneg = fneg float %max 1071 %use1 = fmul float %max, 4.0 1072 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 1073 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 1074 ret <2 x float> %ins1 1075} 1076 1077; -------------------------------------------------------------------------------- 1078; fma tests 1079; -------------------------------------------------------------------------------- 1080 1081; GCN-LABEL: {{^}}v_fneg_fma_f32: 1082; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1083; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1084; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1085 1086; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 1087; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]] 1088 1089; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 1090; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1091define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1092 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1093 %tid.ext = sext i32 %tid to i64 1094 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1095 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1096 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1097 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1098 %a = load volatile float, float addrspace(1)* %a.gep 1099 %b = load volatile float, float addrspace(1)* %b.gep 1100 %c = load volatile float, float addrspace(1)* %c.gep 1101 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 1102 %fneg = fneg float %fma 1103 store float %fneg, float addrspace(1)* %out.gep 1104 ret void 1105} 1106 1107; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32: 1108; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1109; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1110; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1111; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 1112; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 1113; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]] 1114; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1115define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1116 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1117 %tid.ext = sext i32 %tid to i64 1118 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1119 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1120 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1121 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1122 %a = load volatile float, float addrspace(1)* %a.gep 1123 %b = load volatile float, float addrspace(1)* %b.gep 1124 %c = load volatile float, float addrspace(1)* %c.gep 1125 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 1126 %fneg = fneg float %fma 1127 store volatile float %fneg, float addrspace(1)* %out 1128 store volatile float %fma, float addrspace(1)* %out 1129 ret void 1130} 1131 1132; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32: 1133; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1134; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1135; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1136 1137; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 1138; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 1139; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]] 1140 1141; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 1142; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]] 1143 1144; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]] 1145; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1146define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1147 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1148 %tid.ext = sext i32 %tid to i64 1149 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1150 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1151 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1152 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1153 %a = load volatile float, float addrspace(1)* %a.gep 1154 %b = load volatile float, float addrspace(1)* %b.gep 1155 %c = load volatile float, float addrspace(1)* %c.gep 1156 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 1157 %fneg = fneg float %fma 1158 %use1 = fmul float %fma, 4.0 1159 store volatile float %fneg, float addrspace(1)* %out 1160 store volatile float %use1, float addrspace(1)* %out 1161 ret void 1162} 1163 1164; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32: 1165; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1166; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1167; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1168 1169; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]] 1170; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 1171 1172; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 1173; GCN-NSZ-NOT: [[FMA]] 1174; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1175define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1176 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1177 %tid.ext = sext i32 %tid to i64 1178 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1179 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1180 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1181 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1182 %a = load volatile float, float addrspace(1)* %a.gep 1183 %b = load volatile float, float addrspace(1)* %b.gep 1184 %c = load volatile float, float addrspace(1)* %c.gep 1185 %fneg.a = fneg float %a 1186 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1187 %fneg = fneg float %fma 1188 store volatile float %fneg, float addrspace(1)* %out 1189 ret void 1190} 1191 1192; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32: 1193; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1194; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1195; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1196 1197; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 1198; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 1199 1200; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 1201; GCN-NSZ-NOT: [[FMA]] 1202; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1203define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1204 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1205 %tid.ext = sext i32 %tid to i64 1206 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1207 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1208 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1209 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1210 %a = load volatile float, float addrspace(1)* %a.gep 1211 %b = load volatile float, float addrspace(1)* %b.gep 1212 %c = load volatile float, float addrspace(1)* %c.gep 1213 %fneg.b = fneg float %b 1214 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) 1215 %fneg = fneg float %fma 1216 store volatile float %fneg, float addrspace(1)* %out 1217 ret void 1218} 1219 1220; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32: 1221; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1222; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1223; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1224 1225; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 1226; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] 1227 1228; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 1229; GCN-NSZ-NOT: [[FMA]] 1230; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1231define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1232 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1233 %tid.ext = sext i32 %tid to i64 1234 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1235 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1236 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1237 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1238 %a = load volatile float, float addrspace(1)* %a.gep 1239 %b = load volatile float, float addrspace(1)* %b.gep 1240 %c = load volatile float, float addrspace(1)* %c.gep 1241 %fneg.a = fneg float %a 1242 %fneg.b = fneg float %b 1243 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) 1244 %fneg = fneg float %fma 1245 store volatile float %fneg, float addrspace(1)* %out 1246 ret void 1247} 1248 1249; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32: 1250; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1251; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1252; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1253 1254; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]] 1255; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] 1256 1257; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 1258; GCN-NSZ-NOT: [[FMA]] 1259; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1260define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1261 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1262 %tid.ext = sext i32 %tid to i64 1263 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1264 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1265 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1266 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1267 %a = load volatile float, float addrspace(1)* %a.gep 1268 %b = load volatile float, float addrspace(1)* %b.gep 1269 %c = load volatile float, float addrspace(1)* %c.gep 1270 %fneg.a = fneg float %a 1271 %fneg.c = fneg float %c 1272 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) 1273 %fneg = fneg float %fma 1274 store volatile float %fneg, float addrspace(1)* %out 1275 ret void 1276} 1277 1278; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32: 1279; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1280; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1281; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1282 1283; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 1284; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 1285 1286; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 1287; GCN-NSZ-NOT: [[FMA]] 1288; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1289define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1290 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1291 %tid.ext = sext i32 %tid to i64 1292 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1293 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1294 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1295 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1296 %a = load volatile float, float addrspace(1)* %a.gep 1297 %b = load volatile float, float addrspace(1)* %b.gep 1298 %c = load volatile float, float addrspace(1)* %c.gep 1299 %fneg.c = fneg float %c 1300 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) 1301 %fneg = fneg float %fma 1302 store volatile float %fneg, float addrspace(1)* %out 1303 ret void 1304} 1305 1306; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32: 1307; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1308; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1309; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1310 1311; GCN-SAFE: v_xor_b32 1312; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], 1313; GCN-SAFE: v_xor_b32 1314 1315; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1316; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 1317 1318; GCN-NSZ-NOT: [[FMA]] 1319; GCN-NSZ-NOT: [[NEG_A]] 1320; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] 1321; GCN-NSZ-NOT: [[NEG_A]] 1322; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]] 1323define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1324 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1325 %tid.ext = sext i32 %tid to i64 1326 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1327 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1328 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1329 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1330 %a = load volatile float, float addrspace(1)* %a.gep 1331 %b = load volatile float, float addrspace(1)* %b.gep 1332 %c = load volatile float, float addrspace(1)* %c.gep 1333 %fneg.a = fneg float %a 1334 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1335 %fneg = fneg float %fma 1336 store volatile float %fneg, float addrspace(1)* %out 1337 store volatile float %fneg.a, float addrspace(1)* %out 1338 ret void 1339} 1340 1341; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32: 1342; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1343; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1344; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1345 1346; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1347; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]] 1348; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 1349 1350; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 1351; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]] 1352; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1353define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 { 1354 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1355 %tid.ext = sext i32 %tid to i64 1356 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1357 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1358 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1359 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1360 %a = load volatile float, float addrspace(1)* %a.gep 1361 %b = load volatile float, float addrspace(1)* %b.gep 1362 %c = load volatile float, float addrspace(1)* %c.gep 1363 %fneg.a = fneg float %a 1364 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1365 %fneg = fneg float %fma 1366 %use1 = fmul float %fneg.a, %d 1367 store volatile float %fneg, float addrspace(1)* %out 1368 store volatile float %use1, float addrspace(1)* %out 1369 ret void 1370} 1371 1372; -------------------------------------------------------------------------------- 1373; fmad tests 1374; -------------------------------------------------------------------------------- 1375 1376; GCN-LABEL: {{^}}v_fneg_fmad_f32: 1377; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1378; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1379; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1380 1381; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]] 1382; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]] 1383 1384; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 1385; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1386define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1387 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1388 %tid.ext = sext i32 %tid to i64 1389 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1390 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1391 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1392 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1393 %a = load volatile float, float addrspace(1)* %a.gep 1394 %b = load volatile float, float addrspace(1)* %b.gep 1395 %c = load volatile float, float addrspace(1)* %c.gep 1396 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 1397 %fneg = fneg float %fma 1398 store float %fneg, float addrspace(1)* %out.gep 1399 ret void 1400} 1401 1402; GCN-LABEL: {{^}}v_fneg_fmad_v4f32: 1403 1404; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} 1405; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} 1406; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} 1407; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} 1408define amdgpu_kernel void @v_fneg_fmad_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %a.ptr, <4 x float> addrspace(1)* %b.ptr, <4 x float> addrspace(1)* %c.ptr) #0 { 1409 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1410 %tid.ext = sext i32 %tid to i64 1411 %a.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a.ptr, i64 %tid.ext 1412 %b.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %b.ptr, i64 %tid.ext 1413 %c.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %c.ptr, i64 %tid.ext 1414 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext 1415 %a = load volatile <4 x float>, <4 x float> addrspace(1)* %a.gep 1416 %b = load volatile <4 x float>, <4 x float> addrspace(1)* %b.gep 1417 %c = load volatile <4 x float>, <4 x float> addrspace(1)* %c.gep 1418 %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 1419 %fneg = fneg <4 x float> %fma 1420 store <4 x float> %fneg, <4 x float> addrspace(1)* %out.gep 1421 ret void 1422} 1423 1424; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32: 1425; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1426; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1427; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1428 1429; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]] 1430; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]] 1431; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]] 1432 1433; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]] 1434; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]] 1435 1436; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]] 1437; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1438define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1439 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1440 %tid.ext = sext i32 %tid to i64 1441 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1442 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1443 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1444 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1445 %a = load volatile float, float addrspace(1)* %a.gep 1446 %b = load volatile float, float addrspace(1)* %b.gep 1447 %c = load volatile float, float addrspace(1)* %c.gep 1448 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 1449 %fneg = fneg float %fma 1450 %use1 = fmul float %fma, 4.0 1451 store volatile float %fneg, float addrspace(1)* %out 1452 store volatile float %use1, float addrspace(1)* %out 1453 ret void 1454} 1455 1456; -------------------------------------------------------------------------------- 1457; fp_extend tests 1458; -------------------------------------------------------------------------------- 1459 1460; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64: 1461; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1462; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]] 1463; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1464define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1465 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1466 %tid.ext = sext i32 %tid to i64 1467 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1468 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1469 %a = load volatile float, float addrspace(1)* %a.gep 1470 %fpext = fpext float %a to double 1471 %fneg = fsub double -0.000000e+00, %fpext 1472 store double %fneg, double addrspace(1)* %out.gep 1473 ret void 1474} 1475 1476; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64: 1477; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1478; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 1479; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1480define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1481 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1482 %tid.ext = sext i32 %tid to i64 1483 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1484 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1485 %a = load volatile float, float addrspace(1)* %a.gep 1486 %fneg.a = fneg float %a 1487 %fpext = fpext float %fneg.a to double 1488 %fneg = fsub double -0.000000e+00, %fpext 1489 store double %fneg, double addrspace(1)* %out.gep 1490 ret void 1491} 1492 1493; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64: 1494; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1495; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 1496; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]] 1497; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1498; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]] 1499define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1500 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1501 %tid.ext = sext i32 %tid to i64 1502 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1503 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1504 %a = load volatile float, float addrspace(1)* %a.gep 1505 %fneg.a = fneg float %a 1506 %fpext = fpext float %fneg.a to double 1507 %fneg = fsub double -0.000000e+00, %fpext 1508 store volatile double %fneg, double addrspace(1)* %out.gep 1509 store volatile float %fneg.a, float addrspace(1)* undef 1510 ret void 1511} 1512 1513; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64: 1514; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1515; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 1516; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 1517; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 1518; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}} 1519define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1520 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1521 %tid.ext = sext i32 %tid to i64 1522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1523 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1524 %a = load volatile float, float addrspace(1)* %a.gep 1525 %fpext = fpext float %a to double 1526 %fneg = fsub double -0.000000e+00, %fpext 1527 store volatile double %fneg, double addrspace(1)* %out.gep 1528 store volatile double %fpext, double addrspace(1)* undef 1529 ret void 1530} 1531 1532; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64: 1533; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1534; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 1535; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 1536; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0 1537; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 1538; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1539define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1540 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1541 %tid.ext = sext i32 %tid to i64 1542 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1543 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1544 %a = load volatile float, float addrspace(1)* %a.gep 1545 %fpext = fpext float %a to double 1546 %fneg = fsub double -0.000000e+00, %fpext 1547 %mul = fmul double %fpext, 4.0 1548 store volatile double %fneg, double addrspace(1)* %out.gep 1549 store volatile double %mul, double addrspace(1)* %out.gep 1550 ret void 1551} 1552 1553; FIXME: Source modifiers not folded for f16->f32 1554; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32: 1555define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 1556 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1557 %tid.ext = sext i32 %tid to i64 1558 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 1559 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1560 %a = load volatile half, half addrspace(1)* %a.gep 1561 %fpext = fpext half %a to float 1562 %fneg = fneg float %fpext 1563 store volatile float %fneg, float addrspace(1)* %out.gep 1564 store volatile float %fpext, float addrspace(1)* %out.gep 1565 ret void 1566} 1567 1568; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32: 1569define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 1570 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1571 %tid.ext = sext i32 %tid to i64 1572 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 1573 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1574 %a = load volatile half, half addrspace(1)* %a.gep 1575 %fpext = fpext half %a to float 1576 %fneg = fneg float %fpext 1577 %mul = fmul float %fpext, 4.0 1578 store volatile float %fneg, float addrspace(1)* %out.gep 1579 store volatile float %mul, float addrspace(1)* %out.gep 1580 ret void 1581} 1582 1583; -------------------------------------------------------------------------------- 1584; fp_round tests 1585; -------------------------------------------------------------------------------- 1586 1587; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32: 1588; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1589; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]] 1590; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1591define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1592 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1593 %tid.ext = sext i32 %tid to i64 1594 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1595 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1596 %a = load volatile double, double addrspace(1)* %a.gep 1597 %fpround = fptrunc double %a to float 1598 %fneg = fneg float %fpround 1599 store float %fneg, float addrspace(1)* %out.gep 1600 ret void 1601} 1602 1603; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32: 1604; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1605; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 1606; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1607define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1608 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1609 %tid.ext = sext i32 %tid to i64 1610 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1611 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1612 %a = load volatile double, double addrspace(1)* %a.gep 1613 %fneg.a = fsub double -0.000000e+00, %a 1614 %fpround = fptrunc double %fneg.a to float 1615 %fneg = fneg float %fpround 1616 store float %fneg, float addrspace(1)* %out.gep 1617 ret void 1618} 1619 1620; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32: 1621; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} 1622; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} 1623; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] 1624; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1625; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}} 1626define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1627 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1628 %tid.ext = sext i32 %tid to i64 1629 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1630 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1631 %a = load volatile double, double addrspace(1)* %a.gep 1632 %fneg.a = fsub double -0.000000e+00, %a 1633 %fpround = fptrunc double %fneg.a to float 1634 %fneg = fneg float %fpround 1635 store volatile float %fneg, float addrspace(1)* %out.gep 1636 store volatile double %fneg.a, double addrspace(1)* undef 1637 ret void 1638} 1639 1640; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32: 1641; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1642; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 1643; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}} 1644 1645; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1646; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]] 1647define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 { 1648 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1649 %tid.ext = sext i32 %tid to i64 1650 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1651 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1652 %a = load volatile double, double addrspace(1)* %a.gep 1653 %fneg.a = fsub double -0.000000e+00, %a 1654 %fpround = fptrunc double %fneg.a to float 1655 %fneg = fneg float %fpround 1656 %use1 = fmul double %fneg.a, %c 1657 store volatile float %fneg, float addrspace(1)* %out.gep 1658 store volatile double %use1, double addrspace(1)* undef 1659 ret void 1660} 1661 1662; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16: 1663; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1664; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1665; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1666define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1667 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1668 %tid.ext = sext i32 %tid to i64 1669 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1670 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1671 %a = load volatile float, float addrspace(1)* %a.gep 1672 %fpround = fptrunc float %a to half 1673 %fneg = fsub half -0.000000e+00, %fpround 1674 store half %fneg, half addrspace(1)* %out.gep 1675 ret void 1676} 1677 1678; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16: 1679; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1680; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1681; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1682define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1683 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1684 %tid.ext = sext i32 %tid to i64 1685 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1686 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1687 %a = load volatile float, float addrspace(1)* %a.gep 1688 %fneg.a = fneg float %a 1689 %fpround = fptrunc float %fneg.a to half 1690 %fneg = fsub half -0.000000e+00, %fpround 1691 store half %fneg, half addrspace(1)* %out.gep 1692 ret void 1693} 1694 1695; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32: 1696; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1697; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]] 1698; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]] 1699; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]] 1700; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]] 1701define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1702 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1703 %tid.ext = sext i32 %tid to i64 1704 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1705 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1706 %a = load volatile double, double addrspace(1)* %a.gep 1707 %fpround = fptrunc double %a to float 1708 %fneg = fneg float %fpround 1709 store volatile float %fneg, float addrspace(1)* %out.gep 1710 store volatile float %fpround, float addrspace(1)* %out.gep 1711 ret void 1712} 1713 1714; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16: 1715; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1716; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1717; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1718; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1719; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]] 1720define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1721 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1722 %tid.ext = sext i32 %tid to i64 1723 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1724 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1725 %a = load volatile float, float addrspace(1)* %a.gep 1726 %fneg.a = fneg float %a 1727 %fpround = fptrunc float %fneg.a to half 1728 %fneg = fsub half -0.000000e+00, %fpround 1729 store volatile half %fneg, half addrspace(1)* %out.gep 1730 store volatile float %fneg.a, float addrspace(1)* undef 1731 ret void 1732} 1733 1734; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16: 1735; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1736; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1737; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s 1738; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1739; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]] 1740define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1741 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1742 %tid.ext = sext i32 %tid to i64 1743 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1744 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1745 %a = load volatile float, float addrspace(1)* %a.gep 1746 %fneg.a = fneg float %a 1747 %fpround = fptrunc float %fneg.a to half 1748 %fneg = fsub half -0.000000e+00, %fpround 1749 %use1 = fmul float %fneg.a, %c 1750 store volatile half %fneg, half addrspace(1)* %out.gep 1751 store volatile float %use1, float addrspace(1)* undef 1752 ret void 1753} 1754 1755; -------------------------------------------------------------------------------- 1756; rcp tests 1757; -------------------------------------------------------------------------------- 1758 1759; GCN-LABEL: {{^}}v_fneg_rcp_f32: 1760; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1761; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1762; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1763define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1764 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1765 %tid.ext = sext i32 %tid to i64 1766 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1767 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1768 %a = load volatile float, float addrspace(1)* %a.gep 1769 %rcp = call float @llvm.amdgcn.rcp.f32(float %a) 1770 %fneg = fneg float %rcp 1771 store float %fneg, float addrspace(1)* %out.gep 1772 ret void 1773} 1774 1775; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32: 1776; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1777; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1778; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1779define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1780 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1781 %tid.ext = sext i32 %tid to i64 1782 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1783 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1784 %a = load volatile float, float addrspace(1)* %a.gep 1785 %fneg.a = fneg float %a 1786 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1787 %fneg = fneg float %rcp 1788 store float %fneg, float addrspace(1)* %out.gep 1789 ret void 1790} 1791 1792; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32: 1793; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1794; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1795; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1796; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1797; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]] 1798define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1799 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1800 %tid.ext = sext i32 %tid to i64 1801 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1802 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1803 %a = load volatile float, float addrspace(1)* %a.gep 1804 %fneg.a = fneg float %a 1805 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1806 %fneg = fneg float %rcp 1807 store volatile float %fneg, float addrspace(1)* %out.gep 1808 store volatile float %fneg.a, float addrspace(1)* undef 1809 ret void 1810} 1811 1812; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32: 1813; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1814; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1815; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1816; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1817; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1818define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1819 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1820 %tid.ext = sext i32 %tid to i64 1821 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1822 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1823 %a = load volatile float, float addrspace(1)* %a.gep 1824 %fneg.a = fneg float %a 1825 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1826 %fneg = fneg float %rcp 1827 %use1 = fmul float %fneg.a, %c 1828 store volatile float %fneg, float addrspace(1)* %out.gep 1829 store volatile float %use1, float addrspace(1)* undef 1830 ret void 1831} 1832 1833; -------------------------------------------------------------------------------- 1834; fmul_legacy tests 1835; -------------------------------------------------------------------------------- 1836 1837; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32: 1838; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1839; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1840; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 1841; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 1842define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1843 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1844 %tid.ext = sext i32 %tid to i64 1845 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1846 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1847 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1848 %a = load volatile float, float addrspace(1)* %a.gep 1849 %b = load volatile float, float addrspace(1)* %b.gep 1850 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1851 %fneg = fneg float %mul 1852 store float %fneg, float addrspace(1)* %out.gep 1853 ret void 1854} 1855 1856; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32: 1857; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1858; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1859; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1860; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]] 1861; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]] 1862; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 1863define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1864 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1865 %tid.ext = sext i32 %tid to i64 1866 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1867 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1868 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1869 %a = load volatile float, float addrspace(1)* %a.gep 1870 %b = load volatile float, float addrspace(1)* %b.gep 1871 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1872 %fneg = fneg float %mul 1873 store volatile float %fneg, float addrspace(1)* %out 1874 store volatile float %mul, float addrspace(1)* %out 1875 ret void 1876} 1877 1878; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32: 1879; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1880; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1881; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1882; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0 1883; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 1884; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1885define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1886 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1887 %tid.ext = sext i32 %tid to i64 1888 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1889 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1890 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1891 %a = load volatile float, float addrspace(1)* %a.gep 1892 %b = load volatile float, float addrspace(1)* %b.gep 1893 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1894 %fneg = fneg float %mul 1895 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) 1896 store volatile float %fneg, float addrspace(1)* %out 1897 store volatile float %use1, float addrspace(1)* %out 1898 ret void 1899} 1900 1901; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32: 1902; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1903; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1904; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1905; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 1906define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1907 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1908 %tid.ext = sext i32 %tid to i64 1909 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1910 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1911 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1912 %a = load volatile float, float addrspace(1)* %a.gep 1913 %b = load volatile float, float addrspace(1)* %b.gep 1914 %fneg.a = fneg float %a 1915 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1916 %fneg = fneg float %mul 1917 store volatile float %fneg, float addrspace(1)* %out 1918 ret void 1919} 1920 1921; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32: 1922; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1923; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1924; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1925; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 1926define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1927 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1928 %tid.ext = sext i32 %tid to i64 1929 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1930 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1931 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1932 %a = load volatile float, float addrspace(1)* %a.gep 1933 %b = load volatile float, float addrspace(1)* %b.gep 1934 %fneg.b = fneg float %b 1935 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) 1936 %fneg = fneg float %mul 1937 store volatile float %fneg, float addrspace(1)* %out 1938 ret void 1939} 1940 1941; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32: 1942; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1943; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1944; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1945; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] 1946define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1947 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1948 %tid.ext = sext i32 %tid to i64 1949 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1950 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1951 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1952 %a = load volatile float, float addrspace(1)* %a.gep 1953 %b = load volatile float, float addrspace(1)* %b.gep 1954 %fneg.a = fneg float %a 1955 %fneg.b = fneg float %b 1956 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) 1957 %fneg = fneg float %mul 1958 store volatile float %fneg, float addrspace(1)* %out 1959 ret void 1960} 1961 1962; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32: 1963; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1964; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1965; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1966; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]] 1967; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]] 1968; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]] 1969define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1970 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1971 %tid.ext = sext i32 %tid to i64 1972 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1973 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1974 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1975 %a = load volatile float, float addrspace(1)* %a.gep 1976 %b = load volatile float, float addrspace(1)* %b.gep 1977 %fneg.a = fneg float %a 1978 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1979 %fneg = fneg float %mul 1980 store volatile float %fneg, float addrspace(1)* %out 1981 store volatile float %fneg.a, float addrspace(1)* %out 1982 ret void 1983} 1984 1985; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32: 1986; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1987; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1988; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]] 1989; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1990; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]] 1991; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 1992define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 1993 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1994 %tid.ext = sext i32 %tid to i64 1995 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1996 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1997 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1998 %a = load volatile float, float addrspace(1)* %a.gep 1999 %b = load volatile float, float addrspace(1)* %b.gep 2000 %fneg.a = fneg float %a 2001 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 2002 %fneg = fneg float %mul 2003 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) 2004 store volatile float %fneg, float addrspace(1)* %out 2005 store volatile float %use1, float addrspace(1)* %out 2006 ret void 2007} 2008 2009; -------------------------------------------------------------------------------- 2010; sin tests 2011; -------------------------------------------------------------------------------- 2012 2013; GCN-LABEL: {{^}}v_fneg_sin_f32: 2014; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2015; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]] 2016; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]] 2017; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]] 2018; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2019define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2020 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2021 %tid.ext = sext i32 %tid to i64 2022 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2023 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2024 %a = load volatile float, float addrspace(1)* %a.gep 2025 %sin = call float @llvm.sin.f32(float %a) 2026 %fneg = fneg float %sin 2027 store float %fneg, float addrspace(1)* %out.gep 2028 ret void 2029} 2030 2031; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32: 2032; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2033; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 2034; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2035define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2036 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2037 %tid.ext = sext i32 %tid to i64 2038 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2039 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2040 %a = load volatile float, float addrspace(1)* %a.gep 2041 %sin = call float @llvm.amdgcn.sin.f32(float %a) 2042 %fneg = fsub float -0.0, %sin 2043 store float %fneg, float addrspace(1)* %out.gep 2044 ret void 2045} 2046 2047; -------------------------------------------------------------------------------- 2048; ftrunc tests 2049; -------------------------------------------------------------------------------- 2050 2051; GCN-LABEL: {{^}}v_fneg_trunc_f32: 2052; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2053; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 2054; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2055define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2056 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2057 %tid.ext = sext i32 %tid to i64 2058 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2059 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2060 %a = load volatile float, float addrspace(1)* %a.gep 2061 %trunc = call float @llvm.trunc.f32(float %a) 2062 %fneg = fsub float -0.0, %trunc 2063 store float %fneg, float addrspace(1)* %out.gep 2064 ret void 2065} 2066 2067; -------------------------------------------------------------------------------- 2068; fround tests 2069; -------------------------------------------------------------------------------- 2070 2071; GCN-LABEL: {{^}}v_fneg_round_f32: 2072; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2073; GCN: v_trunc_f32_e32 2074; GCN: v_sub_f32_e32 2075; GCN: v_cndmask_b32 2076 2077; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} 2078; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]] 2079 2080; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}} 2081; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2082define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2083 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2084 %tid.ext = sext i32 %tid to i64 2085 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2086 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2087 %a = load volatile float, float addrspace(1)* %a.gep 2088 %round = call float @llvm.round.f32(float %a) 2089 %fneg = fsub float -0.0, %round 2090 store float %fneg, float addrspace(1)* %out.gep 2091 ret void 2092} 2093 2094; -------------------------------------------------------------------------------- 2095; rint tests 2096; -------------------------------------------------------------------------------- 2097 2098; GCN-LABEL: {{^}}v_fneg_rint_f32: 2099; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2100; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 2101; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2102define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2103 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2104 %tid.ext = sext i32 %tid to i64 2105 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2106 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2107 %a = load volatile float, float addrspace(1)* %a.gep 2108 %rint = call float @llvm.rint.f32(float %a) 2109 %fneg = fsub float -0.0, %rint 2110 store float %fneg, float addrspace(1)* %out.gep 2111 ret void 2112} 2113 2114; -------------------------------------------------------------------------------- 2115; nearbyint tests 2116; -------------------------------------------------------------------------------- 2117 2118; GCN-LABEL: {{^}}v_fneg_nearbyint_f32: 2119; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2120; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 2121; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2122define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2123 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2124 %tid.ext = sext i32 %tid to i64 2125 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2126 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2127 %a = load volatile float, float addrspace(1)* %a.gep 2128 %nearbyint = call float @llvm.nearbyint.f32(float %a) 2129 %fneg = fsub float -0.0, %nearbyint 2130 store float %fneg, float addrspace(1)* %out.gep 2131 ret void 2132} 2133 2134; -------------------------------------------------------------------------------- 2135; fcanonicalize tests 2136; -------------------------------------------------------------------------------- 2137 2138; GCN-LABEL: {{^}}v_fneg_canonicalize_f32: 2139; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2140; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]] 2141; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 2142define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 2143 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2144 %tid.ext = sext i32 %tid to i64 2145 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2146 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2147 %a = load volatile float, float addrspace(1)* %a.gep 2148 %trunc = call float @llvm.canonicalize.f32(float %a) 2149 %fneg = fsub float -0.0, %trunc 2150 store float %fneg, float addrspace(1)* %out.gep 2151 ret void 2152} 2153 2154; -------------------------------------------------------------------------------- 2155; vintrp tests 2156; -------------------------------------------------------------------------------- 2157 2158; GCN-LABEL: {{^}}v_fneg_interp_p1_f32: 2159; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2160; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2161; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 2162; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]] 2163; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]] 2164define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 2165 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2166 %tid.ext = sext i32 %tid to i64 2167 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2168 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2169 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2170 %a = load volatile float, float addrspace(1)* %a.gep 2171 %b = load volatile float, float addrspace(1)* %b.gep 2172 %mul = fmul float %a, %b 2173 %fneg = fsub float -0.0, %mul 2174 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0) 2175 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0) 2176 store volatile float %intrp0, float addrspace(1)* %out.gep 2177 store volatile float %intrp1, float addrspace(1)* %out.gep 2178 ret void 2179} 2180 2181; GCN-LABEL: {{^}}v_fneg_interp_p2_f32: 2182; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2183; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2184; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 2185; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]] 2186; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]] 2187define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 2188 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2189 %tid.ext = sext i32 %tid to i64 2190 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2191 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2192 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2193 %a = load volatile float, float addrspace(1)* %a.gep 2194 %b = load volatile float, float addrspace(1)* %b.gep 2195 %mul = fmul float %a, %b 2196 %fneg = fsub float -0.0, %mul 2197 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0) 2198 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0) 2199 store volatile float %intrp0, float addrspace(1)* %out.gep 2200 store volatile float %intrp1, float addrspace(1)* %out.gep 2201 ret void 2202} 2203 2204; -------------------------------------------------------------------------------- 2205; CopyToReg tests 2206; -------------------------------------------------------------------------------- 2207 2208; GCN-LABEL: {{^}}v_fneg_copytoreg_f32: 2209; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2210; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2211; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2212; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]] 2213; GCN: s_cbranch_scc0 2214 2215; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]] 2216; GCN: s_endpgm 2217 2218; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]] 2219; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]] 2220; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 2221 2222define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 2223 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2224 %tid.ext = sext i32 %tid to i64 2225 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2226 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2227 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2228 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2229 %a = load volatile float, float addrspace(1)* %a.gep 2230 %b = load volatile float, float addrspace(1)* %b.gep 2231 %c = load volatile float, float addrspace(1)* %c.gep 2232 %mul = fmul float %a, %b 2233 %fneg = fsub float -0.0, %mul 2234 %cmp0 = icmp eq i32 %d, 0 2235 br i1 %cmp0, label %if, label %endif 2236 2237if: 2238 %mul1 = fmul float %fneg, %c 2239 store volatile float %mul1, float addrspace(1)* %out.gep 2240 br label %endif 2241 2242endif: 2243 store volatile float %mul, float addrspace(1)* %out.gep 2244 ret void 2245} 2246 2247; -------------------------------------------------------------------------------- 2248; inlineasm tests 2249; -------------------------------------------------------------------------------- 2250 2251; Can't fold into use, so should fold into source 2252; GCN-LABEL: {{^}}v_fneg_inlineasm_f32: 2253; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2254; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2255; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 2256; GCN: ; use [[MUL]] 2257; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 2258define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 2259 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2260 %tid.ext = sext i32 %tid to i64 2261 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2262 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2263 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2264 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2265 %a = load volatile float, float addrspace(1)* %a.gep 2266 %b = load volatile float, float addrspace(1)* %b.gep 2267 %c = load volatile float, float addrspace(1)* %c.gep 2268 %mul = fmul float %a, %b 2269 %fneg = fsub float -0.0, %mul 2270 call void asm sideeffect "; use $0", "v"(float %fneg) #0 2271 store volatile float %fneg, float addrspace(1)* %out.gep 2272 ret void 2273} 2274 2275; -------------------------------------------------------------------------------- 2276; inlineasm tests 2277; -------------------------------------------------------------------------------- 2278 2279; Can't fold into use, so should fold into source 2280; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32: 2281; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2282; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2283; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 2284; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]] 2285; GCN: ; use [[NEG]] 2286; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] 2287define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 2288 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2289 %tid.ext = sext i32 %tid to i64 2290 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2291 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2292 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2293 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2294 %a = load volatile float, float addrspace(1)* %a.gep 2295 %b = load volatile float, float addrspace(1)* %b.gep 2296 %c = load volatile float, float addrspace(1)* %c.gep 2297 %mul = fmul float %a, %b 2298 %fneg = fsub float -0.0, %mul 2299 call void asm sideeffect "; use $0", "v"(float %fneg) #0 2300 store volatile float %mul, float addrspace(1)* %out.gep 2301 ret void 2302} 2303 2304; -------------------------------------------------------------------------------- 2305; code size regression tests 2306; -------------------------------------------------------------------------------- 2307 2308; There are multiple users of the fneg that must use a VOP3 2309; instruction, so there is no penalty 2310; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32: 2311; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2312; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2313; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2314 2315; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]] 2316; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0 2317 2318; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]] 2319; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]] 2320define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 2321 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2322 %tid.ext = sext i32 %tid to i64 2323 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2324 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2325 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2326 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2327 %a = load volatile float, float addrspace(1)* %a.gep 2328 %b = load volatile float, float addrspace(1)* %b.gep 2329 %c = load volatile float, float addrspace(1)* %c.gep 2330 2331 %fneg.a = fsub float -0.0, %a 2332 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 2333 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0) 2334 2335 store volatile float %fma0, float addrspace(1)* %out 2336 store volatile float %fma1, float addrspace(1)* %out 2337 ret void 2338} 2339 2340; There are multiple users, but both require using a larger encoding 2341; for the modifier. 2342 2343; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32: 2344; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2345; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2346; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2347 2348; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]] 2349; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]] 2350; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]] 2351; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 2352define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 2353 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2354 %tid.ext = sext i32 %tid to i64 2355 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2356 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2357 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2358 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2359 %a = load volatile float, float addrspace(1)* %a.gep 2360 %b = load volatile float, float addrspace(1)* %b.gep 2361 %c = load volatile float, float addrspace(1)* %c.gep 2362 2363 %fneg.a = fsub float -0.0, %a 2364 %mul0 = fmul float %fneg.a, %b 2365 %mul1 = fmul float %fneg.a, %c 2366 2367 store volatile float %mul0, float addrspace(1)* %out 2368 store volatile float %mul1, float addrspace(1)* %out 2369 ret void 2370} 2371 2372; One user is VOP3 so has no cost to folding the modifier, the other does. 2373; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32: 2374; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2375; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2376; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2377 2378; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0 2379; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]] 2380 2381; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]] 2382; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 2383define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 2384 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2385 %tid.ext = sext i32 %tid to i64 2386 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2387 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2388 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2389 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2390 %a = load volatile float, float addrspace(1)* %a.gep 2391 %b = load volatile float, float addrspace(1)* %b.gep 2392 %c = load volatile float, float addrspace(1)* %c.gep 2393 2394 %fneg.a = fsub float -0.0, %a 2395 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0) 2396 %mul1 = fmul float %fneg.a, %c 2397 2398 store volatile float %fma0, float addrspace(1)* %out 2399 store volatile float %mul1, float addrspace(1)* %out 2400 ret void 2401} 2402 2403; The use of the fneg requires a code size increase, but folding into 2404; the source does not 2405 2406; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32: 2407; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2408; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2409; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2410; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]] 2411 2412; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0 2413; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]] 2414; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]] 2415 2416; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0 2417; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]] 2418; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]] 2419 2420; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 2421; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]] 2422define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2423 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2424 %tid.ext = sext i32 %tid to i64 2425 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2426 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2427 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2428 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2429 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2430 %a = load volatile float, float addrspace(1)* %a.gep 2431 %b = load volatile float, float addrspace(1)* %b.gep 2432 %c = load volatile float, float addrspace(1)* %c.gep 2433 %d = load volatile float, float addrspace(1)* %d.gep 2434 2435 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0) 2436 %fneg.fma0 = fsub float -0.0, %fma0 2437 %mul1 = fmul float %fneg.fma0, %c 2438 %mul2 = fmul float %fneg.fma0, %d 2439 2440 store volatile float %mul1, float addrspace(1)* %out 2441 store volatile float %mul2, float addrspace(1)* %out 2442 ret void 2443} 2444 2445; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64: 2446; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 2447; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 2448; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]] 2449; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]] 2450 2451; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0 2452; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]] 2453; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]] 2454 2455; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]] 2456; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 2457define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 { 2458 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2459 %tid.ext = sext i32 %tid to i64 2460 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 2461 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext 2462 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext 2463 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext 2464 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 2465 %a = load volatile double, double addrspace(1)* %a.gep 2466 %b = load volatile double, double addrspace(1)* %b.gep 2467 %c = load volatile double, double addrspace(1)* %c.gep 2468 %d = load volatile double, double addrspace(1)* %d.gep 2469 2470 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0) 2471 %fneg.fma0 = fsub double -0.0, %fma0 2472 %mul1 = fmul double %fneg.fma0, %c 2473 %mul2 = fmul double %fneg.fma0, %d 2474 2475 store volatile double %mul1, double addrspace(1)* %out 2476 store volatile double %mul2, double addrspace(1)* %out 2477 ret void 2478} 2479 2480; %trunc.a has one fneg use, but it requires a code size increase and 2481; %the fneg can instead be folded for free into the fma. 2482 2483; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32: 2484; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2485; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2486; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2487; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]] 2488; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]] 2489; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]] 2490define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2491 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2492 %tid.ext = sext i32 %tid to i64 2493 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2494 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2495 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2496 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2497 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2498 %a = load volatile float, float addrspace(1)* %a.gep 2499 %b = load volatile float, float addrspace(1)* %b.gep 2500 %c = load volatile float, float addrspace(1)* %c.gep 2501 %d = load volatile float, float addrspace(1)* %d.gep 2502 2503 %trunc.a = call float @llvm.trunc.f32(float %a) 2504 %trunc.fneg.a = fsub float -0.0, %trunc.a 2505 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2506 store volatile float %fma0, float addrspace(1)* %out 2507 ret void 2508} 2509 2510; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src: 2511; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2512; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2513; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2514; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]] 2515; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]] 2516; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]] 2517; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]] 2518; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]] 2519; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]] 2520define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2521 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2522 %tid.ext = sext i32 %tid to i64 2523 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2524 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2525 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2526 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2527 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2528 %a = load volatile float, float addrspace(1)* %a.gep 2529 %b = load volatile float, float addrspace(1)* %b.gep 2530 %c = load volatile float, float addrspace(1)* %c.gep 2531 %d = load volatile float, float addrspace(1)* %d.gep 2532 2533 %trunc.a = call float @llvm.trunc.f32(float %a) 2534 %trunc.fneg.a = fsub float -0.0, %trunc.a 2535 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2536 %mul1 = fmul float %trunc.a, %d 2537 store volatile float %fma0, float addrspace(1)* %out 2538 store volatile float %mul1, float addrspace(1)* %out 2539 ret void 2540} 2541 2542declare i32 @llvm.amdgcn.workitem.id.x() #1 2543declare float @llvm.fma.f32(float, float, float) #1 2544declare float @llvm.fmuladd.f32(float, float, float) #1 2545declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 2546declare float @llvm.sin.f32(float) #1 2547declare float @llvm.trunc.f32(float) #1 2548declare float @llvm.round.f32(float) #1 2549declare float @llvm.rint.f32(float) #1 2550declare float @llvm.nearbyint.f32(float) #1 2551declare float @llvm.canonicalize.f32(float) #1 2552declare float @llvm.minnum.f32(float, float) #1 2553declare float @llvm.maxnum.f32(float, float) #1 2554declare half @llvm.minnum.f16(half, half) #1 2555declare double @llvm.minnum.f64(double, double) #1 2556declare double @llvm.fma.f64(double, double, double) #1 2557 2558declare float @llvm.amdgcn.sin.f32(float) #1 2559declare float @llvm.amdgcn.rcp.f32(float) #1 2560declare float @llvm.amdgcn.rcp.legacy(float) #1 2561declare float @llvm.amdgcn.fmul.legacy(float, float) #1 2562declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 2563declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 2564 2565attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 2566attributes #1 = { nounwind readnone } 2567attributes #2 = { nounwind "unsafe-fp-math"="true" } 2568