1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}v_sad_u32_pat1: 4; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 5define amdgpu_kernel void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 6 %icmp0 = icmp ugt i32 %a, %b 7 %t0 = select i1 %icmp0, i32 %a, i32 %b 8 9 %icmp1 = icmp ule i32 %a, %b 10 %t1 = select i1 %icmp1, i32 %a, i32 %b 11 12 %ret0 = sub i32 %t0, %t1 13 %ret = add i32 %ret0, %c 14 15 store i32 %ret, i32 addrspace(1)* %out 16 ret void 17} 18 19; GCN-LABEL: {{^}}v_sad_u32_constant_pat1: 20; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20 21define amdgpu_kernel void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) { 22 %icmp0 = icmp ugt i32 %a, 90 23 %t0 = select i1 %icmp0, i32 %a, i32 90 24 25 %icmp1 = icmp ule i32 %a, 90 26 %t1 = select i1 %icmp1, i32 %a, i32 90 27 28 %ret0 = sub i32 %t0, %t1 29 %ret = add i32 %ret0, 20 30 31 store i32 %ret, i32 addrspace(1)* %out 32 ret void 33} 34 35; GCN-LABEL: {{^}}v_sad_u32_pat2: 36; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 37define amdgpu_kernel void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 38 %icmp0 = icmp ugt i32 %a, %b 39 %sub0 = sub i32 %a, %b 40 %sub1 = sub i32 %b, %a 41 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 42 43 %ret = add i32 %ret0, %c 44 45 store i32 %ret, i32 addrspace(1)* %out 46 ret void 47} 48 49; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1: 50; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 51; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 52; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 53; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 54define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 55 %icmp0 = icmp ugt i32 %a, %b 56 %t0 = select i1 %icmp0, i32 %a, i32 %b 57 58 %icmp1 = icmp ule i32 %a, %b 59 %t1 = select i1 %icmp1, i32 %a, i32 %b 60 61 %ret0 = sub i32 %t0, %t1 62 store volatile i32 %ret0, i32 addrspace(5)*undef 63 %ret = add i32 %ret0, %c 64 65 store i32 %ret, i32 addrspace(1)* %out 66 ret void 67} 68 69; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1: 70; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 71define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 72 %icmp0 = icmp ugt i32 %a, %b 73 %t0 = select i1 %icmp0, i32 %a, i32 %b 74 75 %icmp1 = icmp ule i32 %a, %b 76 %t1 = select i1 %icmp1, i32 %a, i32 %b 77 78 %ret0 = sub i32 %t0, %t1 79 %ret = add i32 %ret0, %c 80 store volatile i32 %ret, i32 addrspace(5)*undef 81 store i32 %ret, i32 addrspace(1)* %out 82 ret void 83} 84 85; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1: 86; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 87define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 88 %icmp0 = icmp ugt i32 %a, %b 89 %t0 = select i1 %icmp0, i32 %a, i32 %b 90 store volatile i32 %t0, i32 addrspace(5)*undef 91 92 %icmp1 = icmp ule i32 %a, %b 93 %t1 = select i1 %icmp1, i32 %a, i32 %b 94 95 %ret0 = sub i32 %t0, %t1 96 %ret = add i32 %ret0, %c 97 98 store i32 %ret, i32 addrspace(1)* %out 99 ret void 100} 101 102; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1: 103; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 104define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 105 %icmp0 = icmp ugt i32 %a, %b 106 %t0 = select i1 %icmp0, i32 %a, i32 %b 107 108 %icmp1 = icmp ule i32 %a, %b 109 %t1 = select i1 %icmp1, i32 %a, i32 %b 110 111 store volatile i32 %t1, i32 addrspace(5)*undef 112 113 %ret0 = sub i32 %t0, %t1 114 %ret = add i32 %ret0, %c 115 116 store i32 %ret, i32 addrspace(1)* %out 117 ret void 118} 119 120; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2: 121; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 122define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 123 %icmp0 = icmp ugt i32 %a, %b 124 %sub0 = sub i32 %a, %b 125 store volatile i32 %sub0, i32 addrspace(5)*undef 126 %sub1 = sub i32 %b, %a 127 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 128 129 %ret = add i32 %ret0, %c 130 131 store i32 %ret, i32 addrspace(1)* %out 132 ret void 133} 134 135; GCN-LABEL: {{^}}v_sad_u32_multi_use_select_pat2: 136; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 137; GCN-DAG: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} 138; GCN-DAG: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 139define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { 140 %icmp0 = icmp ugt i32 %a, %b 141 %sub0 = sub i32 %a, %b 142 %sub1 = sub i32 %b, %a 143 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 144 store volatile i32 %ret0, i32 addrspace(5)*undef 145 146 %ret = add i32 %ret0, %c 147 148 store i32 %ret, i32 addrspace(1)* %out 149 ret void 150} 151 152; GCN-LABEL: {{^}}v_sad_u32_vector_pat1: 153; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 154; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 155; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 156; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 157define amdgpu_kernel void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 158 %icmp0 = icmp ugt <4 x i32> %a, %b 159 %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b 160 161 %icmp1 = icmp ule <4 x i32> %a, %b 162 %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b 163 164 %ret0 = sub <4 x i32> %t0, %t1 165 %ret = add <4 x i32> %ret0, %c 166 167 store <4 x i32> %ret, <4 x i32> addrspace(1)* %out 168 ret void 169} 170 171; GCN-LABEL: {{^}}v_sad_u32_vector_pat2: 172; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 173; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 174; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 175; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 176define amdgpu_kernel void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 177 %icmp0 = icmp ugt <4 x i32> %a, %b 178 %sub0 = sub <4 x i32> %a, %b 179 %sub1 = sub <4 x i32> %b, %a 180 %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1 181 182 %ret = add <4 x i32> %ret0, %c 183 184 store <4 x i32> %ret, <4 x i32> addrspace(1)* %out 185 ret void 186} 187 188; GCN-LABEL: {{^}}v_sad_u32_i16_pat1: 189; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 190define amdgpu_kernel void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { 191 192 %icmp0 = icmp ugt i16 %a, %b 193 %t0 = select i1 %icmp0, i16 %a, i16 %b 194 195 %icmp1 = icmp ule i16 %a, %b 196 %t1 = select i1 %icmp1, i16 %a, i16 %b 197 198 %ret0 = sub i16 %t0, %t1 199 %ret = add i16 %ret0, %c 200 201 store i16 %ret, i16 addrspace(1)* %out 202 ret void 203} 204 205; GCN-LABEL: {{^}}v_sad_u32_i16_pat2: 206; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 207define amdgpu_kernel void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out) { 208 %a = load volatile i16, i16 addrspace(1)* undef 209 %b = load volatile i16, i16 addrspace(1)* undef 210 %c = load volatile i16, i16 addrspace(1)* undef 211 %icmp0 = icmp ugt i16 %a, %b 212 %sub0 = sub i16 %a, %b 213 %sub1 = sub i16 %b, %a 214 %ret0 = select i1 %icmp0, i16 %sub0, i16 %sub1 215 216 %ret = add i16 %ret0, %c 217 218 store i16 %ret, i16 addrspace(1)* %out 219 ret void 220} 221 222; GCN-LABEL: {{^}}v_sad_u32_i8_pat1: 223; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 224define amdgpu_kernel void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { 225 %icmp0 = icmp ugt i8 %a, %b 226 %t0 = select i1 %icmp0, i8 %a, i8 %b 227 228 %icmp1 = icmp ule i8 %a, %b 229 %t1 = select i1 %icmp1, i8 %a, i8 %b 230 231 %ret0 = sub i8 %t0, %t1 232 %ret = add i8 %ret0, %c 233 234 store i8 %ret, i8 addrspace(1)* %out 235 ret void 236} 237 238; GCN-LABEL: {{^}}v_sad_u32_i8_pat2: 239; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 240define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out) { 241 %a = load volatile i8, i8 addrspace(1)* undef 242 %b = load volatile i8, i8 addrspace(1)* undef 243 %c = load volatile i8, i8 addrspace(1)* undef 244 %icmp0 = icmp ugt i8 %a, %b 245 %sub0 = sub i8 %a, %b 246 %sub1 = sub i8 %b, %a 247 %ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1 248 249 %ret = add i8 %ret0, %c 250 251 store i8 %ret, i8 addrspace(1)* %out 252 ret void 253} 254 255; GCN-LABEL: {{^}}s_sad_u32_i8_pat2: 256; GCN: s_load_dword 257; GCN: s_bfe_u32 258; GCN: s_sub_i32 259; GCN: s_and_b32 260; GCN: s_sub_i32 261; GCN: s_lshr_b32 262; GCN: v_add_i32_e32 263define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) { 264 %icmp0 = icmp ugt i8 %a, %b 265 %sub0 = sub i8 %a, %b 266 %sub1 = sub i8 %b, %a 267 %ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1 268 269 %ret = add i8 %ret0, %c 270 271 store i8 %ret, i8 addrspace(1)* %out 272 ret void 273} 274 275; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat1: 276; GCN: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}} 277; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 278; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 279; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 280define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { 281 %icmp0 = icmp ugt i32 %a, %b 282 %t0 = select i1 %icmp0, i32 %a, i32 %b 283 284 %icmp1 = icmp ule i32 %a, %b 285 %t1 = select i1 %icmp1, i32 %a, i32 %d 286 287 %ret0 = sub i32 %t0, %t1 288 %ret = add i32 %ret0, %c 289 290 store i32 %ret, i32 addrspace(1)* %out 291 ret void 292} 293 294; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2: 295; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 296; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 297; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 298define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { 299 %icmp0 = icmp ugt i32 %a, %b 300 %sub0 = sub i32 %a, %d 301 %sub1 = sub i32 %b, %a 302 %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 303 304 %ret = add i32 %ret0, %c 305 306 store i32 %ret, i32 addrspace(1)* %out 307 ret void 308} 309 310