1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s 5 6; FUNC-LABEL: {{^}}s_abs_i32: 7; GCN: s_abs_i32 8; GCN: s_add_i32 9 10; EG: MAX_INT 11define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind { 12 %neg = sub i32 0, %val 13 %cond = icmp sgt i32 %val, %neg 14 %res = select i1 %cond, i32 %val, i32 %neg 15 %res2 = add i32 %res, 2 16 store i32 %res2, i32 addrspace(1)* %out, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}v_abs_i32: 21; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] 22; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] 23 24; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]] 25 26; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc 27; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2 28 29; EG: MAX_INT 30define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { 31 %val = load i32, i32 addrspace(1)* %src, align 4 32 %neg = sub i32 0, %val 33 %cond = icmp sgt i32 %val, %neg 34 %res = select i1 %cond, i32 %val, i32 %neg 35 %res2 = add i32 %res, 2 36 store i32 %res2, i32 addrspace(1)* %out, align 4 37 ret void 38} 39 40; GCN-LABEL: {{^}}v_abs_i32_repeat_user: 41; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] 42; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] 43; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] 44; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]] 45define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { 46 %val = load i32, i32 addrspace(1)* %src, align 4 47 %neg = sub i32 0, %val 48 %cond = icmp sgt i32 %val, %neg 49 %res = select i1 %cond, i32 %val, i32 %neg 50 %mul = mul i32 %res, %res 51 store i32 %mul, i32 addrspace(1)* %out, align 4 52 ret void 53} 54 55; FUNC-LABEL: {{^}}s_abs_v2i32: 56; GCN: s_abs_i32 57; GCN: s_abs_i32 58; GCN: s_add_i32 59; GCN: s_add_i32 60 61; EG: MAX_INT 62; EG: MAX_INT 63define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind { 64 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 65 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 66 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 67 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 68 %neg = sub <2 x i32> %z1, %val 69 %cond = icmp sgt <2 x i32> %val, %neg 70 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 71 %res2 = add <2 x i32> %res, %t1 72 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 73 ret void 74} 75 76; FUNC-LABEL: {{^}}v_abs_v2i32: 77; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 78; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 79 80; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]] 81; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]] 82 83; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] 84; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] 85 86; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc 87; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc 88 89; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 90; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 91 92; EG: MAX_INT 93; EG: MAX_INT 94define amdgpu_kernel void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind { 95 %z0 = insertelement <2 x i32> undef, i32 0, i32 0 96 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 97 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 98 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 99 %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4 100 %neg = sub <2 x i32> %z1, %val 101 %cond = icmp sgt <2 x i32> %val, %neg 102 %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg 103 %res2 = add <2 x i32> %res, %t1 104 store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 105 ret void 106} 107 108; FUNC-LABEL: {{^}}s_abs_v4i32: 109; TODO: this should use s_abs_i32 110; GCN: s_abs_i32 111; GCN: s_abs_i32 112; GCN: s_abs_i32 113; GCN: s_abs_i32 114 115; GCN: s_add_i32 116; GCN: s_add_i32 117; GCN: s_add_i32 118; GCN: s_add_i32 119 120; EG: MAX_INT 121; EG: MAX_INT 122; EG: MAX_INT 123; EG: MAX_INT 124define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind { 125 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 126 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 127 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 128 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 129 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 130 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 131 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 132 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 133 %neg = sub <4 x i32> %z3, %val 134 %cond = icmp sgt <4 x i32> %val, %neg 135 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 136 %res2 = add <4 x i32> %res, %t3 137 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 138 ret void 139} 140 141; FUNC-LABEL: {{^}}v_abs_v4i32: 142 143; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] 144; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] 145; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] 146; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] 147 148; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]] 149; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]] 150; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]] 151; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]] 152 153; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] 154; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] 155; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]] 156; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]] 157 158; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 159; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 160; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 161; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 162 163; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 164; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 165; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 166; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, 167 168; EG: MAX_INT 169; EG: MAX_INT 170; EG: MAX_INT 171; EG: MAX_INT 172define amdgpu_kernel void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind { 173 %z0 = insertelement <4 x i32> undef, i32 0, i32 0 174 %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 175 %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 176 %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 177 %t0 = insertelement <4 x i32> undef, i32 2, i32 0 178 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 179 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 180 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 181 %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4 182 %neg = sub <4 x i32> %z3, %val 183 %cond = icmp sgt <4 x i32> %val, %neg 184 %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg 185 %res2 = add <4 x i32> %res, %t3 186 store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 187 ret void 188} 189 190; FUNC-LABEL: {{^}}s_min_max_i32: 191; GCN: s_load_dword [[VAL0:s[0-9]+]] 192; GCN: s_load_dword [[VAL1:s[0-9]+]] 193 194; GCN-DAG: s_min_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]] 195; GCN-DAG: s_max_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]] 196define amdgpu_kernel void @s_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32], i32 %val0, [8 x i32], i32 %val1) nounwind { 197 %cond0 = icmp sgt i32 %val0, %val1 198 %sel0 = select i1 %cond0, i32 %val0, i32 %val1 199 %sel1 = select i1 %cond0, i32 %val1, i32 %val0 200 201 store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4 202 store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4 203 ret void 204} 205 206; FUNC-LABEL: {{^}}v_min_max_i32: 207; GCN: {{buffer|flat|global}}_load_dword [[VAL0:v[0-9]+]] 208; GCN: {{buffer|flat|global}}_load_dword [[VAL1:v[0-9]+]] 209 210; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]] 211; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]] 212define amdgpu_kernel void @v_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind { 213 %val0 = load volatile i32, i32 addrspace(1)* %ptr0 214 %val1 = load volatile i32, i32 addrspace(1)* %ptr1 215 216 %cond0 = icmp sgt i32 %val0, %val1 217 %sel0 = select i1 %cond0, i32 %val0, i32 %val1 218 %sel1 = select i1 %cond0, i32 %val1, i32 %val0 219 220 store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4 221 store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4 222 ret void 223} 224 225; FUNC-LABEL: {{^}}s_min_max_v4i32: 226; GCN-DAG: s_min_i32 227; GCN-DAG: s_min_i32 228; GCN-DAG: s_min_i32 229; GCN-DAG: s_min_i32 230; GCN-DAG: s_max_i32 231; GCN-DAG: s_max_i32 232; GCN-DAG: s_max_i32 233; GCN-DAG: s_max_i32 234define amdgpu_kernel void @s_min_max_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %val0, <4 x i32> %val1) nounwind { 235 %cond0 = icmp sgt <4 x i32> %val0, %val1 236 %sel0 = select <4 x i1> %cond0, <4 x i32> %val0, <4 x i32> %val1 237 %sel1 = select <4 x i1> %cond0, <4 x i32> %val1, <4 x i32> %val0 238 239 store volatile <4 x i32> %sel0, <4 x i32> addrspace(1)* %out0, align 4 240 store volatile <4 x i32> %sel1, <4 x i32> addrspace(1)* %out1, align 4 241 ret void 242} 243 244; FUNC-LABEL: {{^}}v_min_max_i32_user: 245; GCN: v_cmp_gt_i32_e32 246; GCN-DAG: v_cndmask_b32_e32 247; GCN-DAG: v_cndmask_b32_e32 248; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 249define amdgpu_kernel void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind { 250 %val0 = load volatile i32, i32 addrspace(1)* %ptr0 251 %val1 = load volatile i32, i32 addrspace(1)* %ptr1 252 253 %cond0 = icmp sgt i32 %val0, %val1 254 %sel0 = select i1 %cond0, i32 %val0, i32 %val1 255 %sel1 = select i1 %cond0, i32 %val1, i32 %val0 256 257 store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4 258 store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4 259 store volatile i1 %cond0, i1 addrspace(1)* undef 260 ret void 261} 262