1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX678 %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX678 %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4 5declare float @llvm.fabs.f32(float) #0 6declare float @llvm.canonicalize.f32(float) #0 7declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) #0 8declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) #0 9declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) #0 10declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) #0 11declare double @llvm.fabs.f64(double) #0 12declare double @llvm.canonicalize.f64(double) #0 13declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0 14declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) #0 15declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) #0 16declare half @llvm.canonicalize.f16(half) #0 17declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 18declare i32 @llvm.amdgcn.workitem.id.x() #0 19 20; GCN-LABEL: {{^}}v_test_canonicalize_var_f32: 21; GFX678: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 22; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 23; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 24define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { 25 %val = load float, float addrspace(1)* %out 26 %canonicalized = call float @llvm.canonicalize.f32(float %val) 27 store float %canonicalized, float addrspace(1)* %out 28 ret void 29} 30 31; GCN-LABEL: {{^}}s_test_canonicalize_var_f32: 32; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}} 33; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}} 34; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 35define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 { 36 %canonicalized = call float @llvm.canonicalize.f32(float %val) 37 store float %canonicalized, float addrspace(1)* %out 38 ret void 39} 40 41; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32: 42; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}| 43; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}| 44; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 45define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 { 46 %val = load float, float addrspace(1)* %out 47 %val.fabs = call float @llvm.fabs.f32(float %val) 48 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs) 49 store float %canonicalized, float addrspace(1)* %out 50 ret void 51} 52 53; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32: 54; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}| 55; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}| 56; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 57define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 { 58 %val = load float, float addrspace(1)* %out 59 %val.fabs = call float @llvm.fabs.f32(float %val) 60 %val.fabs.fneg = fneg float %val.fabs 61 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg) 62 store float %canonicalized, float addrspace(1)* %out 63 ret void 64} 65 66; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32: 67; GFX678: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}} 68; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}} 69; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 70define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 { 71 %val = load float, float addrspace(1)* %out 72 %val.fneg = fneg float %val 73 %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg) 74 store float %canonicalized, float addrspace(1)* %out 75 ret void 76} 77 78; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32: 79; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 80; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 81define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 { 82 %canonicalized = call float @llvm.canonicalize.f32(float undef) 83 store float %canonicalized, float addrspace(1)* %out 84 ret void 85} 86 87; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32: 88; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 89; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 90define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 { 91 %canonicalized = call float @llvm.canonicalize.f32(float 0.0) 92 store float %canonicalized, float addrspace(1)* %out 93 ret void 94} 95 96; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32: 97; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 98; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 99define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 { 100 %canonicalized = call float @llvm.canonicalize.f32(float -0.0) 101 store float %canonicalized, float addrspace(1)* %out 102 ret void 103} 104 105; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32: 106; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}} 107; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 108define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 { 109 %canonicalized = call float @llvm.canonicalize.f32(float 1.0) 110 store float %canonicalized, float addrspace(1)* %out 111 ret void 112} 113 114; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32: 115; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}} 116; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 117define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 { 118 %canonicalized = call float @llvm.canonicalize.f32(float -1.0) 119 store float %canonicalized, float addrspace(1)* %out 120 ret void 121} 122 123; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32: 124; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}} 125; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 126define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 { 127 %canonicalized = call float @llvm.canonicalize.f32(float 16.0) 128 store float %canonicalized, float addrspace(1)* %out 129 ret void 130} 131 132; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32: 133; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 134; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 135define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 { 136 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) 137 store float %canonicalized, float addrspace(1)* %out 138 ret void 139} 140 141; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32: 142; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}} 143; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 144define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 { 145 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float)) 146 store float %canonicalized, float addrspace(1)* %out 147 ret void 148} 149 150; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32: 151; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 152; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 153define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 { 154 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) 155 store float %canonicalized, float addrspace(1)* %out 156 ret void 157} 158 159; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32: 160; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}} 161; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 162define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 { 163 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) 164 store float %canonicalized, float addrspace(1)* %out 165 ret void 166} 167 168; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32: 169; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 170; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 171define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 { 172 %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000) 173 store float %canonicalized, float addrspace(1)* %out 174 ret void 175} 176 177; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32: 178; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 179; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 180define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 { 181 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float)) 182 store float %canonicalized, float addrspace(1)* %out 183 ret void 184} 185 186; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32: 187; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 188; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 189define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 { 190 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float)) 191 store float %canonicalized, float addrspace(1)* %out 192 ret void 193} 194 195; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32: 196; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 197; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 198define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 { 199 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float)) 200 store float %canonicalized, float addrspace(1)* %out 201 ret void 202} 203 204; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32: 205; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 206; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 207define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 { 208 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float)) 209 store float %canonicalized, float addrspace(1)* %out 210 ret void 211} 212 213; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32: 214; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 215; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 216define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 { 217 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float)) 218 store float %canonicalized, float addrspace(1)* %out 219 ret void 220} 221 222; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32: 223; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}} 224; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]] 225define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 { 226 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float)) 227 store float %canonicalized, float addrspace(1)* %out 228 ret void 229} 230 231; GCN-LABEL: {{^}}v_test_canonicalize_var_f64: 232; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} 233; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]] 234define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { 235 %val = load double, double addrspace(1)* %out 236 %canonicalized = call double @llvm.canonicalize.f64(double %val) 237 store double %canonicalized, double addrspace(1)* %out 238 ret void 239} 240 241; GCN-LABEL: {{^}}s_test_canonicalize_var_f64: 242; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} 243; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]] 244define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 { 245 %canonicalized = call double @llvm.canonicalize.f64(double %val) 246 store double %canonicalized, double addrspace(1)* %out 247 ret void 248} 249 250; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64: 251; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}| 252; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]] 253define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 { 254 %val = load double, double addrspace(1)* %out 255 %val.fabs = call double @llvm.fabs.f64(double %val) 256 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs) 257 store double %canonicalized, double addrspace(1)* %out 258 ret void 259} 260 261; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64: 262; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}| 263; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]] 264define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 { 265 %val = load double, double addrspace(1)* %out 266 %val.fabs = call double @llvm.fabs.f64(double %val) 267 %val.fabs.fneg = fneg double %val.fabs 268 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg) 269 store double %canonicalized, double addrspace(1)* %out 270 ret void 271} 272 273; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64: 274; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}} 275; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]] 276define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 { 277 %val = load double, double addrspace(1)* %out 278 %val.fneg = fneg double %val 279 %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg) 280 store double %canonicalized, double addrspace(1)* %out 281 ret void 282} 283 284; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64: 285; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 286; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} 287; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 288define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 { 289 %canonicalized = call double @llvm.canonicalize.f64(double 0.0) 290 store double %canonicalized, double addrspace(1)* %out 291 ret void 292} 293 294; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64: 295; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 296; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}} 297; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 298define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 { 299 %canonicalized = call double @llvm.canonicalize.f64(double -0.0) 300 store double %canonicalized, double addrspace(1)* %out 301 ret void 302} 303 304; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64: 305; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 306; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}} 307; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 308define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 { 309 %canonicalized = call double @llvm.canonicalize.f64(double 1.0) 310 store double %canonicalized, double addrspace(1)* %out 311 ret void 312} 313 314; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64: 315; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 316; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}} 317; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 318define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 { 319 %canonicalized = call double @llvm.canonicalize.f64(double -1.0) 320 store double %canonicalized, double addrspace(1)* %out 321 ret void 322} 323 324; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64: 325; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 326; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}} 327; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 328define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 { 329 %canonicalized = call double @llvm.canonicalize.f64(double 16.0) 330 store double %canonicalized, double addrspace(1)* %out 331 ret void 332} 333 334; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64: 335; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 336; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} 337; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 338define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 { 339 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) 340 store double %canonicalized, double addrspace(1)* %out 341 ret void 342} 343 344; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64: 345; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} 346; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}} 347; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 348define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 { 349 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double)) 350 store double %canonicalized, double addrspace(1)* %out 351 ret void 352} 353 354; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64: 355; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 356; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}} 357; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 358define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 { 359 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) 360 store double %canonicalized, double addrspace(1)* %out 361 ret void 362} 363 364; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64: 365; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}} 366; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}} 367; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 368define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 { 369 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) 370 store double %canonicalized, double addrspace(1)* %out 371 ret void 372} 373 374; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64: 375; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 376; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 377; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 378define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 { 379 %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000) 380 store double %canonicalized, double addrspace(1)* %out 381 ret void 382} 383 384; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64: 385; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 386; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 387; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 388define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 { 389 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double)) 390 store double %canonicalized, double addrspace(1)* %out 391 ret void 392} 393 394; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64: 395; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 396; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 397; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 398define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 { 399 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double)) 400 store double %canonicalized, double addrspace(1)* %out 401 ret void 402} 403 404; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64: 405; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 406; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 407; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 408define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 { 409 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double)) 410 store double %canonicalized, double addrspace(1)* %out 411 ret void 412} 413 414; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64: 415; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 416; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 417; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 418define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 { 419 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double)) 420 store double %canonicalized, double addrspace(1)* %out 421 ret void 422} 423 424; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64: 425; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 426; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 427; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 428define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 { 429 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double)) 430 store double %canonicalized, double addrspace(1)* %out 431 ret void 432} 433 434; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64: 435; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}} 436; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 437; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 438define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 { 439 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double)) 440 store double %canonicalized, double addrspace(1)* %out 441 ret void 442} 443 444; GCN-LABEL: {{^}}test_canonicalize_value_f64_flush: 445; GFX678: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}] 446; GCN9: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] 447define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 { 448 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 449 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 450 %v = load double, double addrspace(1)* %gep, align 8 451 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 452 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id 453 store double %canonicalized, double addrspace(1)* %gep2, align 8 454 ret void 455} 456 457; GCN-LABEL: {{^}}test_canonicalize_value_f32_flush: 458; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 459; GFX9: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} 460define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 { 461 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 462 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 463 %v = load float, float addrspace(1)* %gep, align 4 464 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 465 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 466 store float %canonicalized, float addrspace(1)* %gep2, align 4 467 ret void 468} 469 470; GCN-LABEL: {{^}}test_canonicalize_value_f16_flush: 471; GFX8: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 472; GFX9: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} 473define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 { 474 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 475 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 476 %v = load half, half addrspace(1)* %gep, align 2 477 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 478 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id 479 store half %canonicalized, half addrspace(1)* %gep2, align 2 480 ret void 481} 482 483; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_flush: 484; GFX8: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00 485; GFX8-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 486; GFX8-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 487 488; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} 489define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 { 490 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 491 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id 492 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4 493 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v) 494 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id 495 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2 496 ret void 497} 498 499; GCN-LABEL: {{^}}test_canonicalize_value_f64_denorm: 500; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] 501define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #3 { 502 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 503 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 504 %v = load double, double addrspace(1)* %gep, align 8 505 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 506 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id 507 store double %canonicalized, double addrspace(1)* %gep2, align 8 508 ret void 509} 510 511; GCN-LABEL: {{^}}test_canonicalize_value_f32_denorm: 512; GFX678: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 513; GFX9: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} 514define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #3 { 515 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 516 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 517 %v = load float, float addrspace(1)* %gep, align 4 518 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 519 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 520 store float %canonicalized, float addrspace(1)* %gep2, align 4 521 ret void 522} 523 524; FIXME: Conversion to float should count as the canonicalize pre-gfx8 525; GCN-LABEL: {{^}}test_canonicalize_value_f16_denorm: 526; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 527; GFX8: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} 528; GFX9: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} 529define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #3 { 530 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 531 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 532 %v = load half, half addrspace(1)* %gep, align 2 533 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 534 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id 535 store half %canonicalized, half addrspace(1)* %gep2, align 2 536 ret void 537} 538 539; GCN-LABEL: {{^}}test_canonicalize_value_v2f16_denorm: 540; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 541; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 542 543; GFX8: v_max_f16_sdwa 544; GFX8: v_max_f16_e32 545 546; GFX9: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} 547define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #3 { 548 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 549 %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id 550 %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4 551 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v) 552 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id 553 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2 554 ret void 555} 556 557; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f64: 558; GCN: v_max_f64 559; GCN: v_max_f64 560define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace(1)* %out) #1 { 561 %tid = call i32 @llvm.amdgcn.workitem.id.x() 562 %gep = getelementptr <2 x double>, <2 x double> addrspace(1)* %out, i32 %tid 563 %val = load <2 x double>, <2 x double> addrspace(1)* %gep 564 %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val) 565 store <2 x double> %canonicalized, <2 x double> addrspace(1)* %out 566 ret void 567} 568 569; GCN-LABEL: {{^}}v_test_canonicalize_v2f32_flush: 570; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 571; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 572 573; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 574; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 575define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) #1 { 576 %canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg) 577 ret <2 x float> %canon 578} 579 580; GCN-LABEL: {{^}}v_test_canonicalize_v3f32_flush: 581; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 582; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 583; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 584 585; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 586; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 587; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 588define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) #1 { 589 %canon = call <3 x float> @llvm.canonicalize.v3f32(<3 x float> %arg) 590 ret <3 x float> %canon 591} 592 593; GCN-LABEL: {{^}}v_test_canonicalize_v4f32_flush: 594; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 595; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 596; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 597; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 598 599; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 600; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 601; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 602; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 603define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) #1 { 604 %canon = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %arg) 605 ret <4 x float> %canon 606} 607 608; GCN-LABEL: {{^}}v_test_canonicalize_v8f32_flush: 609; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 610; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 611; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 612; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 613; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 614; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 615; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 616; GFX6: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} 617 618; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 619; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 620; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 621; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 622; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 623; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 624; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 625; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 626define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) #1 { 627 %canon = call <8 x float> @llvm.canonicalize.v8f32(<8 x float> %arg) 628 ret <8 x float> %canon 629} 630 631; GCN-LABEL: {{^}}v_test_canonicalize_v2f64: 632; GCN: v_max_f64 633; GCN: v_max_f64 634define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) #1 { 635 %canon = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %arg) 636 ret <2 x double> %canon 637} 638 639; GCN-LABEL: {{^}}v_test_canonicalize_v3f64: 640; GCN: v_max_f64 641; GCN: v_max_f64 642; GCN: v_max_f64 643define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) #1 { 644 %canon = call <3 x double> @llvm.canonicalize.v3f64(<3 x double> %arg) 645 ret <3 x double> %canon 646} 647 648; GCN-LABEL: {{^}}v_test_canonicalize_v4f64: 649; GCN: v_max_f64 650; GCN: v_max_f64 651; GCN: v_max_f64 652; GCN: v_max_f64 653define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 { 654 %canon = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %arg) 655 ret <4 x double> %canon 656} 657 658attributes #0 = { nounwind readnone } 659attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 660attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } 661attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" } 662attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } 663