1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}add_select_fabs_fabs_f32: 5; GCN: buffer_load_dword [[X:v[0-9]+]] 6; GCN: buffer_load_dword [[Y:v[0-9]+]] 7; GCN: buffer_load_dword [[Z:v[0-9]+]] 8 9; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 10; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 11define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 { 12 %x = load volatile float, float addrspace(1)* undef 13 %y = load volatile float, float addrspace(1)* undef 14 %z = load volatile float, float addrspace(1)* undef 15 %cmp = icmp eq i32 %c, 0 16 %fabs.x = call float @llvm.fabs.f32(float %x) 17 %fabs.y = call float @llvm.fabs.f32(float %y) 18 %select = select i1 %cmp, float %fabs.x, float %fabs.y 19 %add = fadd float %select, %z 20 store float %add, float addrspace(1)* undef 21 ret void 22} 23 24; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32: 25; GCN: buffer_load_dword [[X:v[0-9]+]] 26; GCN: buffer_load_dword [[Y:v[0-9]+]] 27; GCN: buffer_load_dword [[Z:v[0-9]+]] 28; GCN: buffer_load_dword [[W:v[0-9]+]] 29 30; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 31; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 32; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]] 33define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 { 34 %x = load volatile float, float addrspace(1)* undef 35 %y = load volatile float, float addrspace(1)* undef 36 %z = load volatile float, float addrspace(1)* undef 37 %w = load volatile float, float addrspace(1)* undef 38 %cmp = icmp eq i32 %c, 0 39 %fabs.x = call float @llvm.fabs.f32(float %x) 40 %fabs.y = call float @llvm.fabs.f32(float %y) 41 %select = select i1 %cmp, float %fabs.x, float %fabs.y 42 %add0 = fadd float %select, %z 43 %add1 = fadd float %fabs.x, %w 44 store volatile float %add0, float addrspace(1)* undef 45 store volatile float %add1, float addrspace(1)* undef 46 ret void 47} 48 49; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32: 50; GCN: buffer_load_dword [[X:v[0-9]+]] 51; GCN: buffer_load_dword [[Y:v[0-9]+]] 52; GCN: buffer_load_dword [[Z:v[0-9]+]] 53 54; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 55; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]] 56; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 57 58; GCN: buffer_store_dword [[ADD]] 59; GCN: buffer_store_dword [[X_ABS]] 60define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 { 61 %x = load volatile float, float addrspace(1)* undef 62 %y = load volatile float, float addrspace(1)* undef 63 %z = load volatile float, float addrspace(1)* undef 64 %cmp = icmp eq i32 %c, 0 65 %fabs.x = call float @llvm.fabs.f32(float %x) 66 %fabs.y = call float @llvm.fabs.f32(float %y) 67 %select = select i1 %cmp, float %fabs.x, float %fabs.y 68 %add0 = fadd float %select, %z 69 store volatile float %add0, float addrspace(1)* undef 70 store volatile float %fabs.x, float addrspace(1)* undef 71 ret void 72} 73 74; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32: 75; GCN: buffer_load_dword [[X:v[0-9]+]] 76; GCN: buffer_load_dword [[Y:v[0-9]+]] 77; GCN: buffer_load_dword [[Z:v[0-9]+]] 78; GCN: buffer_load_dword [[W:v[0-9]+]] 79 80; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 81; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 82; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]] 83define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 { 84 %x = load volatile float, float addrspace(1)* undef 85 %y = load volatile float, float addrspace(1)* undef 86 %z = load volatile float, float addrspace(1)* undef 87 %w = load volatile float, float addrspace(1)* undef 88 %cmp = icmp eq i32 %c, 0 89 %fabs.x = call float @llvm.fabs.f32(float %x) 90 %fabs.y = call float @llvm.fabs.f32(float %y) 91 %select = select i1 %cmp, float %fabs.x, float %fabs.y 92 %add0 = fadd float %select, %z 93 %add1 = fadd float %fabs.y, %w 94 store volatile float %add0, float addrspace(1)* undef 95 store volatile float %add1, float addrspace(1)* undef 96 ret void 97} 98 99; GCN-LABEL: {{^}}add_select_fabs_var_f32: 100; GCN: buffer_load_dword [[X:v[0-9]+]] 101; GCN: buffer_load_dword [[Y:v[0-9]+]] 102; GCN: buffer_load_dword [[Z:v[0-9]+]] 103 104; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|, 105; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 106define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 { 107 %x = load volatile float, float addrspace(1)* undef 108 %y = load volatile float, float addrspace(1)* undef 109 %z = load volatile float, float addrspace(1)* undef 110 %cmp = icmp eq i32 %c, 0 111 %fabs.x = call float @llvm.fabs.f32(float %x) 112 %select = select i1 %cmp, float %fabs.x, float %y 113 %add = fadd float %select, %z 114 store volatile float %add, float addrspace(1)* undef 115 ret void 116} 117 118; GCN-LABEL: {{^}}add_select_fabs_negk_f32: 119; GCN: buffer_load_dword [[X:v[0-9]+]] 120; GCN: buffer_load_dword [[Y:v[0-9]+]] 121 122; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, 123; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 124define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 { 125 %x = load volatile float, float addrspace(1)* undef 126 %y = load volatile float, float addrspace(1)* undef 127 %cmp = icmp eq i32 %c, 0 128 %fabs = call float @llvm.fabs.f32(float %x) 129 %select = select i1 %cmp, float %fabs, float -1.0 130 %add = fadd float %select, %y 131 store volatile float %add, float addrspace(1)* undef 132 ret void 133} 134 135; FIXME: fabs should fold away 136; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32: 137; GCN: buffer_load_dword [[X:v[0-9]+]] 138 139; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 140; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]] 141define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 { 142 %x = load volatile float, float addrspace(1)* undef 143 %cmp = icmp eq i32 %c, 0 144 %select = select i1 %cmp, float -2.0, float -1.0 145 %fabs = call float @llvm.fabs.f32(float %select) 146 %add = fadd float %fabs, %x 147 store volatile float %add, float addrspace(1)* undef 148 ret void 149} 150 151; GCN-LABEL: {{^}}add_select_posk_posk_f32: 152; GCN: buffer_load_dword [[X:v[0-9]+]] 153 154; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s 155; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 156define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 { 157 %x = load volatile float, float addrspace(1)* undef 158 %cmp = icmp eq i32 %c, 0 159 %select = select i1 %cmp, float 2.0, float 1.0 160 %add = fadd float %select, %x 161 store volatile float %add, float addrspace(1)* undef 162 ret void 163} 164 165; GCN-LABEL: {{^}}add_select_negk_fabs_f32: 166; GCN: buffer_load_dword [[X:v[0-9]+]] 167; GCN: buffer_load_dword [[Y:v[0-9]+]] 168 169; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0 170; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]] 171; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 172define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 { 173 %x = load volatile float, float addrspace(1)* undef 174 %y = load volatile float, float addrspace(1)* undef 175 %cmp = icmp eq i32 %c, 0 176 %fabs = call float @llvm.fabs.f32(float %x) 177 %select = select i1 %cmp, float -1.0, float %fabs 178 %add = fadd float %select, %y 179 store volatile float %add, float addrspace(1)* undef 180 ret void 181} 182 183; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32: 184; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 185; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] 186; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000 187 188; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0 189; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]] 190; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 191define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 { 192 %x = load volatile float, float addrspace(1)* undef 193 %y = load volatile float, float addrspace(1)* undef 194 %cmp = icmp eq i32 %c, 0 195 %fabs = call float @llvm.fabs.f32(float %x) 196 %select = select i1 %cmp, float -1024.0, float %fabs 197 %add = fadd float %select, %y 198 store volatile float %add, float addrspace(1)* undef 199 ret void 200} 201 202; GCN-LABEL: {{^}}add_select_fabs_posk_f32: 203; GCN: buffer_load_dword [[X:v[0-9]+]] 204; GCN: buffer_load_dword [[Y:v[0-9]+]] 205 206; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 207; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] 208define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 { 209 %x = load volatile float, float addrspace(1)* undef 210 %y = load volatile float, float addrspace(1)* undef 211 212 %cmp = icmp eq i32 %c, 0 213 %fabs = call float @llvm.fabs.f32(float %x) 214 %select = select i1 %cmp, float %fabs, float 1.0 215 %add = fadd float %select, %y 216 store volatile float %add, float addrspace(1)* undef 217 ret void 218} 219 220; GCN-LABEL: {{^}}add_select_posk_fabs_f32: 221; GCN: buffer_load_dword [[X:v[0-9]+]] 222; GCN: buffer_load_dword [[Y:v[0-9]+]] 223 224; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 225; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 226; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] 227define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 { 228 %x = load volatile float, float addrspace(1)* undef 229 %y = load volatile float, float addrspace(1)* undef 230 %cmp = icmp eq i32 %c, 0 231 %fabs = call float @llvm.fabs.f32(float %x) 232 %select = select i1 %cmp, float 1.0, float %fabs 233 %add = fadd float %select, %y 234 store volatile float %add, float addrspace(1)* undef 235 ret void 236} 237 238; GCN-LABEL: {{^}}add_select_fneg_fneg_f32: 239; GCN: buffer_load_dword [[X:v[0-9]+]] 240; GCN: buffer_load_dword [[Y:v[0-9]+]] 241; GCN: buffer_load_dword [[Z:v[0-9]+]] 242 243; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 244; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 245define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 { 246 %x = load volatile float, float addrspace(1)* undef 247 %y = load volatile float, float addrspace(1)* undef 248 %z = load volatile float, float addrspace(1)* undef 249 %cmp = icmp eq i32 %c, 0 250 %fneg.x = fsub float -0.0, %x 251 %fneg.y = fsub float -0.0, %y 252 %select = select i1 %cmp, float %fneg.x, float %fneg.y 253 %add = fadd float %select, %z 254 store volatile float %add, float addrspace(1)* undef 255 ret void 256} 257 258; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32: 259; GCN: buffer_load_dword [[X:v[0-9]+]] 260; GCN: buffer_load_dword [[Y:v[0-9]+]] 261; GCN: buffer_load_dword [[Z:v[0-9]+]] 262; GCN: buffer_load_dword [[W:v[0-9]+]] 263 264; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 265; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 266; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]] 267define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 { 268 %x = load volatile float, float addrspace(1)* undef 269 %y = load volatile float, float addrspace(1)* undef 270 %z = load volatile float, float addrspace(1)* undef 271 %w = load volatile float, float addrspace(1)* undef 272 %cmp = icmp eq i32 %c, 0 273 %fneg.x = fsub float -0.0, %x 274 %fneg.y = fsub float -0.0, %y 275 %select = select i1 %cmp, float %fneg.x, float %fneg.y 276 %add0 = fadd float %select, %z 277 %add1 = fadd float %fneg.x, %w 278 store volatile float %add0, float addrspace(1)* undef 279 store volatile float %add1, float addrspace(1)* undef 280 ret void 281} 282 283; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32: 284; GCN: buffer_load_dword [[X:v[0-9]+]] 285; GCN: buffer_load_dword [[Y:v[0-9]+]] 286; GCN: buffer_load_dword [[Z:v[0-9]+]] 287 288; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]] 289; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 290; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]] 291 292; GCN: buffer_store_dword [[ADD]] 293; GCN: buffer_store_dword [[NEG_X]] 294define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 { 295 %x = load volatile float, float addrspace(1)* undef 296 %y = load volatile float, float addrspace(1)* undef 297 %z = load volatile float, float addrspace(1)* undef 298 %cmp = icmp eq i32 %c, 0 299 %fneg.x = fsub float -0.0, %x 300 %fneg.y = fsub float -0.0, %y 301 %select = select i1 %cmp, float %fneg.x, float %fneg.y 302 %add0 = fadd float %select, %z 303 store volatile float %add0, float addrspace(1)* undef 304 store volatile float %fneg.x, float addrspace(1)* undef 305 ret void 306} 307 308; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32: 309; GCN: buffer_load_dword [[X:v[0-9]+]] 310; GCN: buffer_load_dword [[Y:v[0-9]+]] 311; GCN: buffer_load_dword [[Z:v[0-9]+]] 312; GCN: buffer_load_dword [[W:v[0-9]+]] 313 314; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 315; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 316; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]] 317define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 { 318 %x = load volatile float, float addrspace(1)* undef 319 %y = load volatile float, float addrspace(1)* undef 320 %z = load volatile float, float addrspace(1)* undef 321 %w = load volatile float, float addrspace(1)* undef 322 %cmp = icmp eq i32 %c, 0 323 %fneg.x = fsub float -0.0, %x 324 %fneg.y = fsub float -0.0, %y 325 %select = select i1 %cmp, float %fneg.x, float %fneg.y 326 %add0 = fadd float %select, %z 327 %add1 = fadd float %fneg.y, %w 328 store volatile float %add0, float addrspace(1)* undef 329 store volatile float %add1, float addrspace(1)* undef 330 ret void 331} 332 333; GCN-LABEL: {{^}}add_select_fneg_var_f32: 334; GCN: buffer_load_dword [[X:v[0-9]+]] 335; GCN: buffer_load_dword [[Y:v[0-9]+]] 336; GCN: buffer_load_dword [[Z:v[0-9]+]] 337 338; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]], 339; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 340define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 { 341 %x = load volatile float, float addrspace(1)* undef 342 %y = load volatile float, float addrspace(1)* undef 343 %z = load volatile float, float addrspace(1)* undef 344 %cmp = icmp eq i32 %c, 0 345 %fneg.x = fsub float -0.0, %x 346 %select = select i1 %cmp, float %fneg.x, float %y 347 %add = fadd float %select, %z 348 store volatile float %add, float addrspace(1)* undef 349 ret void 350} 351 352; GCN-LABEL: {{^}}add_select_fneg_negk_f32: 353; GCN: buffer_load_dword [[X:v[0-9]+]] 354; GCN: buffer_load_dword [[Y:v[0-9]+]] 355 356; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 357; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 358define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 { 359 %x = load volatile float, float addrspace(1)* undef 360 %y = load volatile float, float addrspace(1)* undef 361 %cmp = icmp eq i32 %c, 0 362 %fneg.x = fsub float -0.0, %x 363 %select = select i1 %cmp, float %fneg.x, float -1.0 364 %add = fadd float %select, %y 365 store volatile float %add, float addrspace(1)* undef 366 ret void 367} 368 369; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32: 370; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 371; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] 372; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 373 374; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc 375; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 376define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 { 377 %x = load volatile float, float addrspace(1)* undef 378 %y = load volatile float, float addrspace(1)* undef 379 %cmp = icmp eq i32 %c, 0 380 %fneg.x = fsub float -0.0, %x 381 %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000 382 %add = fadd float %select, %y 383 store volatile float %add, float addrspace(1)* undef 384 ret void 385} 386 387; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32: 388; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 389; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] 390; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 391 392; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc 393; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc 394 395; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 396define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 { 397 %x = load volatile float, float addrspace(1)* undef 398 %y = load volatile float, float addrspace(1)* undef 399 %cmp = icmp eq i32 %c, 0 400 %fneg.x = fsub float -0.0, %x 401 %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000 402 %add = fadd float %select, %y 403 store volatile float %add, float addrspace(1)* undef 404 ret void 405} 406 407; GCN-LABEL: {{^}}add_select_negk_negk_f32: 408; GCN: buffer_load_dword [[X:v[0-9]+]] 409 410; GCN: v_cmp_eq_u32_e64 411; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 412; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 413define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 { 414 %x = load volatile float, float addrspace(1)* undef 415 %cmp = icmp eq i32 %c, 0 416 %select = select i1 %cmp, float -2.0, float -1.0 417 %add = fadd float %select, %x 418 store volatile float %add, float addrspace(1)* undef 419 ret void 420} 421 422; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32: 423; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000 424; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000 425; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 426 427; GCN: v_cmp_eq_u32_e64 428; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc 429; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 430define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 { 431 %x = load volatile float, float addrspace(1)* undef 432 %cmp = icmp eq i32 %c, 0 433 %select = select i1 %cmp, float -2048.0, float -4096.0 434 %add = fadd float %select, %x 435 store volatile float %add, float addrspace(1)* undef 436 ret void 437} 438 439; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32: 440; GCN: buffer_load_dword [[X:v[0-9]+]] 441 442; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s 443; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 444define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 { 445 %x = load volatile float, float addrspace(1)* undef 446 %cmp = icmp eq i32 %c, 0 447 %select = select i1 %cmp, float -2.0, float -1.0 448 %fneg.x = fsub float -0.0, %select 449 %add = fadd float %fneg.x, %x 450 store volatile float %add, float addrspace(1)* undef 451 ret void 452} 453 454; GCN-LABEL: {{^}}add_select_negk_fneg_f32: 455; GCN: buffer_load_dword [[X:v[0-9]+]] 456; GCN: buffer_load_dword [[Y:v[0-9]+]] 457 458; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 459; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 460; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 461define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 { 462 %x = load volatile float, float addrspace(1)* undef 463 %y = load volatile float, float addrspace(1)* undef 464 %cmp = icmp eq i32 %c, 0 465 %fneg.x = fsub float -0.0, %x 466 %select = select i1 %cmp, float -1.0, float %fneg.x 467 %add = fadd float %select, %y 468 store volatile float %add, float addrspace(1)* undef 469 ret void 470} 471 472; GCN-LABEL: {{^}}add_select_fneg_posk_f32: 473; GCN: buffer_load_dword [[X:v[0-9]+]] 474; GCN: buffer_load_dword [[Y:v[0-9]+]] 475 476; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc 477; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 478define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 { 479 %x = load volatile float, float addrspace(1)* undef 480 %y = load volatile float, float addrspace(1)* undef 481 %cmp = icmp eq i32 %c, 0 482 %fneg.x = fsub float -0.0, %x 483 %select = select i1 %cmp, float %fneg.x, float 1.0 484 %add = fadd float %select, %y 485 store volatile float %add, float addrspace(1)* undef 486 ret void 487} 488 489; GCN-LABEL: {{^}}add_select_posk_fneg_f32: 490; GCN: buffer_load_dword [[X:v[0-9]+]] 491; GCN: buffer_load_dword [[Y:v[0-9]+]] 492 493; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 494; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc 495; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 496define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 { 497 %x = load volatile float, float addrspace(1)* undef 498 %y = load volatile float, float addrspace(1)* undef 499 %cmp = icmp eq i32 %c, 0 500 %fneg.x = fsub float -0.0, %x 501 %select = select i1 %cmp, float 1.0, float %fneg.x 502 %add = fadd float %select, %y 503 store volatile float %add, float addrspace(1)* undef 504 ret void 505} 506 507; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32: 508; GCN: buffer_load_dword [[X:v[0-9]+]] 509; GCN: buffer_load_dword [[Y:v[0-9]+]] 510; GCN: buffer_load_dword [[Z:v[0-9]+]] 511 512; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|, 513; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 514define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 { 515 %x = load volatile float, float addrspace(1)* undef 516 %y = load volatile float, float addrspace(1)* undef 517 %z = load volatile float, float addrspace(1)* undef 518 %cmp = icmp eq i32 %c, 0 519 %fabs.x = call float @llvm.fabs.f32(float %x) 520 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 521 %fabs.y = call float @llvm.fabs.f32(float %y) 522 %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y 523 %add = fadd float %select, %z 524 store volatile float %add, float addrspace(1)* undef 525 ret void 526} 527 528; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32: 529; GCN: buffer_load_dword [[X:v[0-9]+]] 530; GCN: buffer_load_dword [[Y:v[0-9]+]] 531; GCN: buffer_load_dword [[Z:v[0-9]+]] 532 533; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|, 534; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 535define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 { 536 %x = load volatile float, float addrspace(1)* undef 537 %y = load volatile float, float addrspace(1)* undef 538 %z = load volatile float, float addrspace(1)* undef 539 %cmp = icmp eq i32 %c, 0 540 %fabs.x = call float @llvm.fabs.f32(float %x) 541 %fabs.y = call float @llvm.fabs.f32(float %y) 542 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y 543 %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y 544 %add = fadd float %select, %z 545 store volatile float %add, float addrspace(1)* undef 546 ret void 547} 548 549; GCN-LABEL: {{^}}add_select_neg_fabs_f32: 550; GCN: buffer_load_dword [[X:v[0-9]+]] 551; GCN: buffer_load_dword [[Y:v[0-9]+]] 552; GCN: buffer_load_dword [[Z:v[0-9]+]] 553 554; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]], 555; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 556define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 { 557 %x = load volatile float, float addrspace(1)* undef 558 %y = load volatile float, float addrspace(1)* undef 559 %z = load volatile float, float addrspace(1)* undef 560 %cmp = icmp eq i32 %c, 0 561 %fneg.x = fsub float -0.000000e+00, %x 562 %fabs.y = call float @llvm.fabs.f32(float %y) 563 %select = select i1 %cmp, float %fneg.x, float %fabs.y 564 %add = fadd float %select, %z 565 store volatile float %add, float addrspace(1)* undef 566 ret void 567} 568 569; GCN-LABEL: {{^}}add_select_fabs_neg_f32: 570; GCN: buffer_load_dword [[X:v[0-9]+]] 571; GCN: buffer_load_dword [[Y:v[0-9]+]] 572; GCN: buffer_load_dword [[Z:v[0-9]+]] 573 574; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|, 575; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 576define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 { 577 %x = load volatile float, float addrspace(1)* undef 578 %y = load volatile float, float addrspace(1)* undef 579 %z = load volatile float, float addrspace(1)* undef 580 %cmp = icmp eq i32 %c, 0 581 %fabs.x = call float @llvm.fabs.f32(float %x) 582 %fneg.y = fsub float -0.000000e+00, %y 583 %select = select i1 %cmp, float %fabs.x, float %fneg.y 584 %add = fadd float %select, %z 585 store volatile float %add, float addrspace(1)* undef 586 ret void 587} 588 589; GCN-LABEL: {{^}}add_select_neg_negfabs_f32: 590; GCN: buffer_load_dword [[X:v[0-9]+]] 591; GCN: buffer_load_dword [[Y:v[0-9]+]] 592; GCN: buffer_load_dword [[Z:v[0-9]+]] 593 594; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]], 595; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 596define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 { 597 %x = load volatile float, float addrspace(1)* undef 598 %y = load volatile float, float addrspace(1)* undef 599 %z = load volatile float, float addrspace(1)* undef 600 %cmp = icmp eq i32 %c, 0 601 %fneg.x = fsub float -0.000000e+00, %x 602 %fabs.y = call float @llvm.fabs.f32(float %y) 603 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y 604 %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y 605 %add = fadd float %select, %z 606 store volatile float %add, float addrspace(1)* undef 607 ret void 608} 609 610; GCN-LABEL: {{^}}add_select_negfabs_neg_f32: 611; GCN: buffer_load_dword [[X:v[0-9]+]] 612; GCN: buffer_load_dword [[Y:v[0-9]+]] 613; GCN: buffer_load_dword [[Z:v[0-9]+]] 614 615; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]], 616; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 617define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 { 618 %x = load volatile float, float addrspace(1)* undef 619 %y = load volatile float, float addrspace(1)* undef 620 %z = load volatile float, float addrspace(1)* undef 621 %cmp = icmp eq i32 %c, 0 622 %fabs.x = call float @llvm.fabs.f32(float %x) 623 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 624 %fneg.y = fsub float -0.000000e+00, %y 625 %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x 626 %add = fadd float %select, %z 627 store volatile float %add, float addrspace(1)* undef 628 ret void 629} 630 631; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32: 632; GCN: buffer_load_dword [[X:v[0-9]+]] 633; GCN: buffer_load_dword [[Y:v[0-9]+]] 634 635; GCN-DAG: v_cmp_eq_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0 636; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]] 637; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] 638define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 { 639 %x = load volatile float, float addrspace(1)* undef 640 %y = load volatile float, float addrspace(1)* undef 641 %cmp = icmp eq i32 %c, 0 642 %fabs.x = call float @llvm.fabs.f32(float %x) 643 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 644 %select = select i1 %cmp, float %fneg.fabs.x, float 4.0 645 %add = fmul float %select, %y 646 store volatile float %add, float addrspace(1)* undef 647 ret void 648} 649 650; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32: 651; GCN: buffer_load_dword [[X:v[0-9]+]] 652; GCN: buffer_load_dword [[Y:v[0-9]+]] 653 654; GCN-DAG: v_cmp_ne_u32_e64 [[VCC:.*]], s{{[0-9]+}}, 0 655; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, |[[X]]|, [[VCC]] 656; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] 657define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 { 658 %x = load volatile float, float addrspace(1)* undef 659 %y = load volatile float, float addrspace(1)* undef 660 %cmp = icmp eq i32 %c, 0 661 %fabs.x = call float @llvm.fabs.f32(float %x) 662 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 663 %select = select i1 %cmp, float 4.0, float %fneg.fabs.x 664 %add = fmul float %select, %y 665 store volatile float %add, float addrspace(1)* undef 666 ret void 667} 668 669; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32: 670; GCN: buffer_load_dword [[X:v[0-9]+]] 671; GCN: buffer_load_dword [[Y:v[0-9]+]] 672 673; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc 674; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] 675define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 { 676 %x = load volatile float, float addrspace(1)* undef 677 %y = load volatile float, float addrspace(1)* undef 678 %cmp = icmp eq i32 %c, 0 679 %fabs.x = call float @llvm.fabs.f32(float %x) 680 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 681 %select = select i1 %cmp, float %fneg.fabs.x, float -4.0 682 %add = fmul float %select, %y 683 store volatile float %add, float addrspace(1)* undef 684 ret void 685} 686 687; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32: 688; GCN: buffer_load_dword [[X:v[0-9]+]] 689; GCN: buffer_load_dword [[Y:v[0-9]+]] 690 691; GCN: v_cmp_ne_u32_e64 vcc 692; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc 693; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] 694define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 { 695 %x = load volatile float, float addrspace(1)* undef 696 %y = load volatile float, float addrspace(1)* undef 697 %cmp = icmp eq i32 %c, 0 698 %fabs.x = call float @llvm.fabs.f32(float %x) 699 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 700 %select = select i1 %cmp, float -4.0, float %fneg.fabs.x 701 %add = fmul float %select, %y 702 store volatile float %add, float addrspace(1)* undef 703 ret void 704} 705 706; -------------------------------------------------------------------------------- 707; Don't fold if fneg can fold into the source 708; -------------------------------------------------------------------------------- 709 710; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32: 711; GCN: buffer_load_dword [[X:v[0-9]+]] 712; GCN: buffer_load_dword [[Y:v[0-9]+]] 713 714; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]] 715; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc 716; GCN-NEXT: buffer_store_dword [[SELECT]] 717define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 { 718 %x = load volatile float, float addrspace(1)* undef 719 %y = load volatile float, float addrspace(1)* undef 720 %cmp = icmp eq i32 %c, 0 721 %add = fadd float %x, 4.0 722 %fneg = fsub float -0.0, %add 723 %select = select i1 %cmp, float %fneg, float 2.0 724 store volatile float %select, float addrspace(1)* undef 725 ret void 726} 727 728; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32: 729; GCN: buffer_load_dword [[X:v[0-9]+]] 730 731; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]] 732; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc 733; GCN-NEXT: buffer_store_dword [[SELECT]] 734define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 { 735 %x = load volatile float, float addrspace(1)* undef 736 %cmp = icmp eq i32 %c, 0 737 %add = fsub float %x, 4.0 738 %fneg = fsub float -0.0, %add 739 %select = select i1 %cmp, float %fneg, float 2.0 740 store volatile float %select, float addrspace(1)* undef 741 ret void 742} 743 744; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32: 745; GCN: buffer_load_dword [[X:v[0-9]+]] 746 747; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]] 748; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc 749; GCN-NEXT: buffer_store_dword [[SELECT]] 750define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 { 751 %x = load volatile float, float addrspace(1)* undef 752 %cmp = icmp eq i32 %c, 0 753 %mul = fmul float %x, 4.0 754 %fneg = fsub float -0.0, %mul 755 %select = select i1 %cmp, float %fneg, float 2.0 756 store volatile float %select, float addrspace(1)* undef 757 ret void 758} 759 760; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32: 761; GCN: buffer_load_dword [[X:v[0-9]+]] 762; GCN: buffer_load_dword [[Z:v[0-9]+]] 763 764; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]] 765; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc 766; GCN-NEXT: buffer_store_dword [[SELECT]] 767define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 { 768 %x = load volatile float, float addrspace(1)* undef 769 %z = load volatile float, float addrspace(1)* undef 770 %cmp = icmp eq i32 %c, 0 771 %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z) 772 %fneg = fsub float -0.0, %fma 773 %select = select i1 %cmp, float %fneg, float 2.0 774 store volatile float %select, float addrspace(1)* undef 775 ret void 776} 777 778; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32: 779; GCN: buffer_load_dword [[X:v[0-9]+]] 780; GCN: buffer_load_dword [[Z:v[0-9]+]] 781 782; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc 783; GCN-NEXT: buffer_store_dword [[SELECT]] 784define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 { 785 %x = load volatile float, float addrspace(1)* undef 786 %z = load volatile float, float addrspace(1)* undef 787 %cmp = icmp eq i32 %c, 0 788 %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z) 789 %fneg = fsub float -0.0, %fmad 790 %select = select i1 %cmp, float %fneg, float 2.0 791 store volatile float %select, float addrspace(1)* undef 792 ret void 793} 794 795; FIXME: This one should fold to rcp 796; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32: 797; GCN: buffer_load_dword [[X:v[0-9]+]] 798 799; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]] 800; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc 801; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] 802; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] 803define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 { 804 %x = load volatile float, float addrspace(1)* undef 805 %y = load volatile float, float addrspace(1)* undef 806 %cmp = icmp eq i32 %c, 0 807 %rcp = call float @llvm.amdgcn.rcp.f32(float %x) 808 %fneg = fsub float -0.0, %rcp 809 %select = select i1 %cmp, float %fneg, float 2.0 810 store volatile float %select, float addrspace(1)* undef 811 ret void 812} 813 814declare float @llvm.fabs.f32(float) #1 815declare float @llvm.fma.f32(float, float, float) #1 816declare float @llvm.fmuladd.f32(float, float, float) #1 817declare float @llvm.amdgcn.rcp.f32(float) #1 818declare float @llvm.amdgcn.rcp.legacy(float) #1 819declare float @llvm.amdgcn.fmul.legacy(float, float) #1 820 821attributes #0 = { nounwind } 822attributes #1 = { nounwind readnone } 823