1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 3 4; FUNC-LABEL: {{^}}s_abs_i32: 5; GCN: s_abs_i32 6; GCN: s_add_i32 7define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind { 8 %neg = sub i32 0, %val 9 %cond = icmp sgt i32 %val, %neg 10 %res = select i1 %cond, i32 %val, i32 %neg 11 %res2 = add i32 %res, 2 12 store i32 %res2, i32 addrspace(1)* %out, align 4 13 ret void 14} 15 16; FUNC-LABEL: {{^}}v_abs_i32: 17; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] 18; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]] 19; GCN: v_add_i32 20define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { 21 %val = load i32, i32 addrspace(1)* %src, align 4 22 %neg = sub i32 0, %val 23 %cond = icmp sgt i32 %val, %neg 24 %res = select i1 %cond, i32 %val, i32 %neg 25 %res2 = add i32 %res, 2 26 store i32 %res2, i32 addrspace(1)* %out, align 4 27 ret void 28} 29 30; FUNC-LABEL: {{^}}s_abs_v2i32: 31; GCN: s_abs_i32 32; GCN: s_abs_i32 33; GCN: s_add_i32 34; GCN: s_add_i32 35define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind { 36 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 37 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 38 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 39 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 40 %neg = sub <2 x i32> %z1, %val 41 %cond = icmp sgt <2 x i32> %val, %neg 42 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 43 %res2 = add <2 x i32> %res, %t1 44 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 45 ret void 46} 47 48; FUNC-LABEL: {{^}}v_abs_v2i32: 49; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 50; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 51 52; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] 53; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] 54 55; GCN: v_add_i32 56; GCN: v_add_i32 57define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind { 58 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 59 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 60 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 61 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 62 %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4 63 %neg = sub <2 x i32> %z1, %val 64 %cond = icmp sgt <2 x i32> %val, %neg 65 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 66 %res2 = add <2 x i32> %res, %t1 67 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 68 ret void 69} 70 71; FUNC-LABEL: {{^}}s_abs_v4i32: 72; TODO: this should use s_abs_i32 73; GCN: s_abs_i32 74; GCN: s_abs_i32 75; GCN: s_abs_i32 76; GCN: s_abs_i32 77 78; GCN: s_add_i32 79; GCN: s_add_i32 80; GCN: s_add_i32 81; GCN: s_add_i32 82define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind { 83 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 84 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 85 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 86 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 87 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 88 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 89 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 90 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 91 %neg = sub <4 x i32> %z3, %val 92 %cond = icmp sgt <4 x i32> %val, %neg 93 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 94 %res2 = add <4 x i32> %res, %t3 95 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 96 ret void 97} 98 99; FUNC-LABEL: {{^}}v_abs_v4i32: 100; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 101; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 102; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] 103; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] 104 105; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] 106; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] 107; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]] 108; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]] 109 110; GCN: v_add_i32 111; GCN: v_add_i32 112; GCN: v_add_i32 113; GCN: v_add_i32 114define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind { 115 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 116 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 117 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 118 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 119 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 120 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 121 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 122 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 123 %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4 124 %neg = sub <4 x i32> %z3, %val 125 %cond = icmp sgt <4 x i32> %val, %neg 126 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 127 %res2 = add <4 x i32> %res, %t3 128 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 129 ret void 130} 131