1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s 3 4declare float @llvm.fma.f32(float, float, float) #1 5declare float @llvm.fmuladd.f32(float, float, float) #1 6declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1 7 8 9; GCN-LABEL: {{^}}test_sgpr_use_twice_binop: 10; GCN: s_load_dword [[SGPR:s[0-9]+]], 11; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]] 12; GCN: buffer_store_dword [[RESULT]] 13define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 { 14 %dbl = fadd float %a, %a 15 store float %dbl, float addrspace(1)* %out, align 4 16 ret void 17} 18 19; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op: 20; GCN: s_load_dword [[SGPR:s[0-9]+]], 21; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] 22; GCN: buffer_store_dword [[RESULT]] 23define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 { 24 %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1 25 store float %fma, float addrspace(1)* %out, align 4 26 ret void 27} 28 29; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b: 30; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 31; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc 32; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 33; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 34; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] 35; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] 36; GCN: buffer_store_dword [[RESULT]] 37define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 { 38 %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1 39 store float %fma, float addrspace(1)* %out, align 4 40 ret void 41} 42 43; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: 44; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 45; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc 46; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 47; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 48; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] 49; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] 50; GCN: buffer_store_dword [[RESULT]] 51define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 { 52 %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1 53 store float %fma, float addrspace(1)* %out, align 4 54 ret void 55} 56 57; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a: 58; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 59; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc 60; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 61; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 62; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] 63; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] 64; GCN: buffer_store_dword [[RESULT]] 65define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 { 66 %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1 67 store float %fma, float addrspace(1)* %out, align 4 68 ret void 69} 70 71; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm: 72; GCN: s_load_dword [[SGPR:s[0-9]+]] 73; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 74; GCN: buffer_store_dword [[RESULT]] 75define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 { 76 %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1 77 store float %fma, float addrspace(1)* %out, align 4 78 ret void 79} 80 81; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a: 82; GCN: s_load_dword [[SGPR:s[0-9]+]] 83; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] 84; GCN: buffer_store_dword [[RESULT]] 85define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 { 86 %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1 87 store float %fma, float addrspace(1)* %out, align 4 88 ret void 89} 90 91; Don't use fma since fma c, x, y is canonicalized to fma x, c, y 92; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: 93; GCN: s_load_dword [[SGPR:s[0-9]+]] 94; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] 95; GCN: buffer_store_dword [[RESULT]] 96define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 { 97 %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1 98 store i32 %fma, i32 addrspace(1)* %out, align 4 99 ret void 100} 101 102attributes #0 = { nounwind } 103attributes #1 = { nounwind readnone } 104