1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s 3; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 4 5declare half @llvm.fabs.f16(half) #0 6declare half @llvm.canonicalize.f16(half) #0 7declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0 8declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 9declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0 10declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0 11declare i32 @llvm.amdgcn.workitem.id.x() #0 12 13; GCN-LABEL: {{^}}test_fold_canonicalize_undef_value_f16: 14; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 15; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 16define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(half addrspace(1)* %out) #1 { 17 %canonicalized = call half @llvm.canonicalize.f16(half undef) 18 store half %canonicalized, half addrspace(1)* %out 19 ret void 20} 21 22; GCN-LABEL: {{^}}v_test_canonicalize_var_f16: 23; GFX89: v_max_f16_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 24; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]] 25 26; CI: v_cvt_f32_f16_e32 27; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 28define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 { 29 %val = load half, half addrspace(1)* %out 30 %canonicalized = call half @llvm.canonicalize.f16(half %val) 31 store half %canonicalized, half addrspace(1)* undef 32 ret void 33} 34 35; GCN-LABEL: {{^}}s_test_canonicalize_var_f16: 36; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}} 37; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 38define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 { 39 %val = bitcast i16 %val.arg to half 40 %canonicalized = call half @llvm.canonicalize.f16(half %val) 41 store half %canonicalized, half addrspace(1)* %out 42 ret void 43} 44 45; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_v2f16: 46; GFX9: v_and_b32_e32 v0, 0xffff, v0 47; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 48; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 49 50; VI: v_max_f16_sdwa v1, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 51; VI: v_max_f16_e32 v0, v0, v0 52; VI: v_or_b32_e32 v0, v0, v1 53define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 { 54 %ins0 = insertelement <2 x half> undef, half %lo, i32 0 55 %ins1 = insertelement <2 x half> %ins0, half %hi, i32 1 56 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1) 57 ret <2 x half> %canonicalized 58} 59 60; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16: 61; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}| 62; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 63define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 { 64 %val = load half, half addrspace(1)* %out 65 %val.fabs = call half @llvm.fabs.f16(half %val) 66 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs) 67 store half %canonicalized, half addrspace(1)* %out 68 ret void 69} 70 71; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16: 72; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}| 73; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 74 75; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| 76; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 77define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 { 78 %val = load half, half addrspace(1)* %out 79 %val.fabs = call half @llvm.fabs.f16(half %val) 80 %val.fabs.fneg = fneg half %val.fabs 81 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg) 82 store half %canonicalized, half addrspace(1)* %out 83 ret void 84} 85 86; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16: 87; GFX89: v_max_f16_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}} 88; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 89 90; CI: v_cvt_f32_f16_e64 {{v[0-9]+}}, -{{v[0-9]+}} 91; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 92define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 { 93 %val = load half, half addrspace(1)* %out 94 %val.fneg = fneg half %val 95 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg) 96 store half %canonicalized, half addrspace(1)* %out 97 ret void 98} 99 100; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_var_f16: 101; VI: v_mul_f16_e32 [[REG:v[0-9]+]], -1.0, v{{[0-9]+}} 102; GFX9: v_max_f16_e64 [[REG:v[0-9]+]], -v{{[0-9]+}}, -v{{[0-9]+}} 103; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 104define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 { 105 %val = load half, half addrspace(1)* %out 106 %val.fneg = fneg half %val 107 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg) 108 store half %canonicalized, half addrspace(1)* %out 109 ret void 110} 111 112; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_fabs_var_f16: 113; VI: v_mul_f16_e64 [[REG:v[0-9]+]], -1.0, |v{{[0-9]+}}| 114; GFX9: v_max_f16_e64 [[REG:v[0-9]+]], -|v{{[0-9]+}}|, -|v{{[0-9]+}}| 115 116; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 117 118; CI: v_cvt_f32_f16_e64 {{v[0-9]+}}, -|{{v[0-9]+}}| 119; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} 120define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 { 121 %val = load half, half addrspace(1)* %out 122 %val.fabs = call half @llvm.fabs.f16(half %val) 123 %val.fabs.fneg = fneg half %val.fabs 124 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg) 125 store half %canonicalized, half addrspace(1)* %out 126 ret void 127} 128 129; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16: 130; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 131; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 132define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 { 133 %canonicalized = call half @llvm.canonicalize.f16(half 0.0) 134 store half %canonicalized, half addrspace(1)* %out 135 ret void 136} 137 138; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16: 139; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}} 140; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 141define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 { 142 %canonicalized = call half @llvm.canonicalize.f16(half -0.0) 143 store half %canonicalized, half addrspace(1)* %out 144 ret void 145} 146 147; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16: 148; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}} 149; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 150define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 { 151 %canonicalized = call half @llvm.canonicalize.f16(half 1.0) 152 store half %canonicalized, half addrspace(1)* %out 153 ret void 154} 155 156; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16: 157; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}} 158; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 159define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 { 160 %canonicalized = call half @llvm.canonicalize.f16(half -1.0) 161 store half %canonicalized, half addrspace(1)* %out 162 ret void 163} 164 165; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16: 166; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}} 167; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 168define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 { 169 %canonicalized = call half @llvm.canonicalize.f16(half 16.0) 170 store half %canonicalized, half addrspace(1)* %out 171 ret void 172} 173 174; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16: 175; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}} 176; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 177define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 { 178 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF) 179 store half %canonicalized, half addrspace(1)* %out 180 ret void 181} 182 183; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16: 184; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}} 185; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 186define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 { 187 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF) 188 store half %canonicalized, half addrspace(1)* %out 189 ret void 190} 191 192; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16: 193; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}} 194; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 195define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 { 196 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF) 197 store half %canonicalized, half addrspace(1)* %out 198 ret void 199} 200 201; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16: 202; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}} 203; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 204define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 { 205 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF) 206 store half %canonicalized, half addrspace(1)* %out 207 ret void 208} 209 210; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16: 211; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}} 212; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 213define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 { 214 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00) 215 store half %canonicalized, half addrspace(1)* %out 216 ret void 217} 218 219; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16: 220; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 221; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 222define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 { 223 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half)) 224 store half %canonicalized, half addrspace(1)* %out 225 ret void 226} 227 228; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16: 229; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 230; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 231define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 { 232 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half)) 233 store half %canonicalized, half addrspace(1)* %out 234 ret void 235} 236 237; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16: 238; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 239; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 240define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 { 241 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01) 242 store half %canonicalized, half addrspace(1)* %out 243 ret void 244} 245 246; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16: 247; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 248; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 249define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 { 250 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF) 251 store half %canonicalized, half addrspace(1)* %out 252 ret void 253} 254 255; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16: 256; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 257; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 258define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 { 259 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF) 260 store half %canonicalized, half addrspace(1)* %out 261 ret void 262} 263 264; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16: 265; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} 266; GFX89: {{flat|global}}_store_short v{{.+}}, [[REG]] 267define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 { 268 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01) 269 store half %canonicalized, half addrspace(1)* %out 270 ret void 271} 272 273; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16: 274; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 275; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} 276; VI-NOT: v_and_b32 277 278; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+$}} 279; GFX9: global_store_dword v{{.+}}, [[REG]], s 280define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 { 281 %tid = call i32 @llvm.amdgcn.workitem.id.x() 282 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 283 %val = load <2 x half>, <2 x half> addrspace(1)* %gep 284 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val) 285 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 286 ret void 287} 288 289; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16: 290; VI: v_max_f16_sdwa [[REG0:v[0-9]+]], |v{{[0-9]+}}|, |v{{[0-9]+}}| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 291; VI: v_max_f16_e64 [[REG1:v[0-9]+]], |v{{[0-9]+}}|, |v{{[0-9]+}}| 292; VI-NOT: 0xffff 293; VI: v_or_b32 294 295; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}} 296; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]]{{$}} 297; GFX89: {{flat|global}}_store_dword 298define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 { 299 %tid = call i32 @llvm.amdgcn.workitem.id.x() 300 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 301 %val = load <2 x half>, <2 x half> addrspace(1)* %gep 302 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) 303 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs) 304 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 305 ret void 306} 307 308; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16: 309; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], -|v{{[0-9]+}}|, -|v{{[0-9]+}}| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 310; VI-DAG: v_max_f16_e64 [[REG1:v[0-9]+]], -|v{{[0-9]+}}|, -|v{{[0-9]+}}| 311; VI: v_or_b32 312 313; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}} 314; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1]{{$}} 315; GFX89: {{flat|global}}_store_dword 316 317; CI: v_cvt_f32_f16 318; CI: v_cvt_f32_f16 319; CI: v_mul_f32_e32 v{{[0-9]+}}, 1.0 320; CI: v_mul_f32_e32 v{{[0-9]+}}, 1.0 321define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 { 322 %tid = call i32 @llvm.amdgcn.workitem.id.x() 323 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 324 %val = load <2 x half>, <2 x half> addrspace(1)* %gep 325 %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) 326 %val.fabs.fneg = fneg <2 x half> %val.fabs 327 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg) 328 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 329 ret void 330} 331 332; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16: 333; VI-DAG: v_max_f16_sdwa [[REG1:v[0-9]+]], -v{{[0-9]+}}, -v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 334; VI-DAG: v_max_f16_e64 [[REG0:v[0-9]+]], -v{{[0-9]+}}, -v{{[0-9]+}} 335; VI-NOT: 0xffff 336 337; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} neg_lo:[1,1] neg_hi:[1,1]{{$}} 338; GFX9: global_store_dword v{{[0-9]+}}, [[REG]], s 339define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 { 340 %tid = call i32 @llvm.amdgcn.workitem.id.x() 341 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 342 %val = load <2 x half>, <2 x half> addrspace(1)* %gep 343 %fneg.val = fneg <2 x half> %val 344 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val) 345 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 346 ret void 347} 348 349; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16: 350; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 351; VI-DAG: v_max_f16_e64 [[REG1:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}} 352; VI-NOT: v_and_b32 353 354; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+$}} 355; GFX9: global_store_dword v{{[0-9]+}}, [[REG]], s 356define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 { 357 %val = bitcast i32 %val.arg to <2 x half> 358 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val) 359 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 360 ret void 361} 362 363; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16: 364; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 365; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 366define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 { 367 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer) 368 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 369 ret void 370} 371 372; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16: 373; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}} 374; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 375define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 { 376 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>) 377 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 378 ret void 379} 380 381; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16: 382; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}} 383; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 384define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 { 385 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>) 386 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 387 ret void 388} 389 390; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16: 391; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}} 392; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 393define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 { 394 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>) 395 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 396 ret void 397} 398 399; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16: 400; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}} 401; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 402define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 { 403 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>) 404 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 405 ret void 406} 407 408; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16: 409; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}} 410; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 411define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 { 412 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>) 413 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 414 ret void 415} 416 417; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16: 418; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}} 419; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 420define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 { 421 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>) 422 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 423 ret void 424} 425 426; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16: 427; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}} 428; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 429define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 { 430 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>) 431 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 432 ret void 433} 434 435; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16: 436; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}} 437; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 438define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 { 439 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>) 440 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 441 ret void 442} 443 444; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16: 445; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}} 446; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 447define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 { 448 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>) 449 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 450 ret void 451} 452 453; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16: 454; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}} 455; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 456define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 { 457 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>)) 458 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 459 ret void 460} 461 462; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16: 463; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}} 464; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 465define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 { 466 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>) 467 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 468 ret void 469} 470 471; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16: 472; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}} 473; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 474define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 { 475 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>) 476 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 477 ret void 478} 479 480; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16: 481; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}} 482; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 483define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 { 484 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>) 485 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 486 ret void 487} 488 489; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16: 490; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}} 491; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 492define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 { 493 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>) 494 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 495 ret void 496} 497 498; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16: 499; GFX89: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}} 500; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 501define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 { 502 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>) 503 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 504 ret void 505} 506 507; FIXME: Extra 4th component handled 508; GCN-LABEL: {{^}}v_test_canonicalize_var_v3f16: 509; GFX9: s_waitcnt 510; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 511; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 512; GFX9-NEXT: s_setpc_b64 513 514; VI-DAG: v_max_f16_sdwa [[CANON_ELT1:v[0-9]+]], v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 515; VI-DAG: v_max_f16_e32 [[CANON_ELT0:v[0-9]+]], v0, v0 516; VI-DAG: v_max_f16_e32 v1, v1, v1 517; VI-DAG: v_or_b32_e32 v0, [[CANON_ELT0]], [[CANON_ELT1]] 518 519; VI: s_setpc_b64 520define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) #1 { 521 %canonicalized = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> %val) 522 ret <3 x half> %canonicalized 523} 524 525; GCN-LABEL: {{^}}v_test_canonicalize_var_v4f16: 526; GFX9: s_waitcnt 527; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 528; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 529; GFX9-NEXT: s_setpc_b64 530 531; VI-DAG: v_max_f16_sdwa [[CANON_ELT3:v[0-9]+]], v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 532; VI-DAG: v_max_f16_e32 [[CANON_ELT2:v[0-9]+]], v1, v1 533; VI-DAG: v_max_f16_sdwa [[CANON_ELT1:v[0-9]+]], v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 534; VI-DAG: v_max_f16_e32 [[CANON_ELT0:v[0-9]+]], v0, v0 535; VI-DAG: v_or_b32_e32 v0, [[CANON_ELT0]], [[CANON_ELT1]] 536; VI-DAG: v_or_b32_e32 v1, [[CANON_ELT2]], [[CANON_ELT3]] 537; VI: s_setpc_b64 538define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 { 539 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %val) 540 ret <4 x half> %canonicalized 541} 542 543; GCN-LABEL: {{^}}s_test_canonicalize_undef_v2f16: 544; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00 545; GFX89: {{flat|global}}_store_dword v{{.+}}, [[REG]] 546define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(<2 x half> addrspace(1)* %out) #1 { 547 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) 548 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out 549 ret void 550} 551 552; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_v2f16: 553; GFX9: s_waitcnt 554; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 555; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 556; GFX9-NEXT: s_setpc_b64 557 558; High bits known zero 559; FIXME: Should also be true on gfx9 by default? 560; VI: s_waitcnt 561; VI-NEXT: v_max_f16_e32 v0, v0, v0 562; VI-NEXT: s_setpc_b64 563define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) #1 { 564 %vec = insertelement <2 x half> undef, half %val, i32 0 565 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) 566 ret <2 x half> %canonicalized 567} 568 569; GCN-LABEL: {{^}}v_test_canonicalize_undef_reg_v2f16: 570; GFX89: s_waitcnt 571; GFX89-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 572; GFX89-NEXT: s_setpc_b64 573define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 { 574 %vec = insertelement <2 x half> undef, half %val, i32 1 575 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) 576 ret <2 x half> %canonicalized 577} 578 579; GCN-LABEL: {{^}}v_test_canonicalize_undef_lo_imm_hi_v2f16: 580; GCN: s_waitcnt 581; GFX89-NEXT: v_mov_b32_e32 v0, 0x3c003c00 582; GFX89-NEXT: s_setpc_b64 583 584; CI-NEXT: v_mov_b32_e32 v0, 0x7fc00000 585; CI-NEXT: v_mov_b32_e32 v1, 1.0 586; CI-NEXT: s_setpc_b64 587define <2 x half> @v_test_canonicalize_undef_lo_imm_hi_v2f16() #1 { 588 %vec = insertelement <2 x half> undef, half 1.0, i32 1 589 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) 590 ret <2 x half> %canonicalized 591} 592 593; GCN-LABEL: {{^}}v_test_canonicalize_imm_lo_undef_hi_v2f16: 594; GCN: s_waitcnt 595; GFX89-NEXT: v_mov_b32_e32 v0, 0x3c003c00 596; GFX89-NEXT: s_setpc_b64 597 598; CI-NEXT: v_mov_b32_e32 v0, 1.0 599; CI-NEXT: v_mov_b32_e32 v1, 0x7fc00000 600; CI-NEXT: s_setpc_b64 601define <2 x half> @v_test_canonicalize_imm_lo_undef_hi_v2f16() #1 { 602 %vec = insertelement <2 x half> undef, half 1.0, i32 0 603 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) 604 ret <2 x half> %canonicalized 605} 606 607; GCN-LABEL: {{^}}v_test_canonicalize_undef_lo_k_hi_v2f16: 608; GCN: s_waitcnt 609; GFX89-NEXT: v_mov_b32_e32 v0, 0x4c004c00 610; GFX89-NEXT: s_setpc_b64 611 612; CI-NEXT: v_mov_b32_e32 v0, 0x7fc00000 613; CI-NEXT: v_mov_b32_e32 v1, 0x41800000 614; CI-NEXT: s_setpc_b64 615define <2 x half> @v_test_canonicalize_undef_lo_k_hi_v2f16() #1 { 616 %vec = insertelement <2 x half> undef, half 16.0, i32 1 617 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) 618 ret <2 x half> %canonicalized 619} 620 621; GCN-LABEL: {{^}}v_test_canonicalize_k_lo_undef_hi_v2f16: 622; GCN: s_waitcnt 623; GFX89-NEXT: v_mov_b32_e32 v0, 0x4c004c00 624; GFX89-NEXT: s_setpc_b64 625 626; CI-NEXT: v_mov_b32_e32 v0, 0x41800000 627; CI-NEXT: v_mov_b32_e32 v1, 0x7fc00000 628; CI-NEXT: s_setpc_b64 629define <2 x half> @v_test_canonicalize_k_lo_undef_hi_v2f16() #1 { 630 %vec = insertelement <2 x half> undef, half 16.0, i32 0 631 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec) 632 ret <2 x half> %canonicalized 633} 634 635; GCN-LABEL: {{^}}v_test_canonicalize_reg_k_v2f16: 636; GFX9: s_waitcnt 637; GFX9-DAG: v_max_f16_e32 v0, v0, v0 638; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4000 639; GFX9: v_and_b32_e32 v0, 0xffff, v0 640; GFX9: v_lshl_or_b32 v0, [[K]], 16, v0 641; GFX9: s_setpc_b64 642 643; VI: s_waitcnt 644; VI-NEXT: v_max_f16_e32 v0, v0, v0 645; VI-NEXT: v_or_b32_e32 v0, 2.0, v0 646; VI-NEXT: s_setpc_b64 647define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 { 648 %vec0 = insertelement <2 x half> undef, half %val, i32 0 649 %vec1 = insertelement <2 x half> %vec0, half 2.0, i32 1 650 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1) 651 ret <2 x half> %canonicalized 652} 653 654; GCN-LABEL: {{^}}v_test_canonicalize_k_reg_v2f16: 655; GFX9: v_max_f16_e32 v0, v0, v0 656; GFX9: v_mov_b32_e32 [[K:v[0-9]+]], 0x4000 657; GFX9: v_lshl_or_b32 v0, v0, 16, [[K]] 658; GFX9: s_setpc_b64 659 660; VI: s_waitcnt 661; VI-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 662; VI-NEXT: v_or_b32_e32 v0, 0x4000, v0 663; VI-NEXT: s_setpc_b64 664define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 { 665 %vec0 = insertelement <2 x half> undef, half 2.0, i32 0 666 %vec1 = insertelement <2 x half> %vec0, half %val, i32 1 667 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1) 668 ret <2 x half> %canonicalized 669} 670 671; GCN-LABEL: {{^}}s_test_canonicalize_undef_v4f16: 672; GCN: v_mov_b32_e32 v0, 0x7e007e00 673; GCN: v_mov_b32_e32 v1, v0 674define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(<4 x half> addrspace(1)* %out) #1 { 675 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) 676 store <4 x half> %canonicalized, <4 x half> addrspace(1)* %out 677 ret void 678} 679 680; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_undef_undef_v4f16: 681; GFX9: s_waitcnt 682; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 683; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 684; GFX9-NEXT: v_mov_b32_e32 v1, 0 685; GFX9-NEXT: s_setpc_b64 686 687; VI: s_waitcnt 688; VI-NEXT: v_max_f16_e32 v0, v0, v0 689; VI-NEXT: v_or_b32_e32 v0, 0x7e000000, v0 690; VI-NEXT: v_mov_b32_e32 v1, 0x7e007e00 691; VI-NEXT: s_setpc_b64 692define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1 { 693 %vec = insertelement <4 x half> undef, half %val, i32 0 694 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec) 695 ret <4 x half> %canonicalized 696} 697 698; GCN-LABEL: {{^}}v_test_canonicalize_reg_reg_undef_undef_v4f16: 699; GFX9: s_waitcnt 700; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 701; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 702; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 703; GFX9-NEXT: v_mov_b32_e32 v1, 0 704; GFX9-NEXT: s_setpc_b64 705 706; VI: s_waitcnt 707; VI-DAG: v_max_f16_e32 v0, v0, v0 708; VI-DAG: v_max_f16_sdwa v1, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 709; VI: v_or_b32_e32 v0, v0, v1 710; VI-NEXT: v_mov_b32_e32 v1, 0x7e007e00 711; VI-NEXT: s_setpc_b64 712define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, half %val1) #1 { 713 %vec0 = insertelement <4 x half> undef, half %val0, i32 0 714 %vec1 = insertelement <4 x half> %vec0, half %val1, i32 1 715 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec1) 716 ret <4 x half> %canonicalized 717} 718 719; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16: 720; GFX9: s_waitcnt 721; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 722; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 723; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 724; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 725; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 726; GFX9-NEXT: s_setpc_b64 727 728; VI: s_waitcnt 729; VI-NEXT: v_max_f16_e32 v0, v0, v0 730; VI-NEXT: v_max_f16_e32 v1, v1, v1 731; VI-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 732; VI-NEXT: v_or_b32_e32 v0, 0x7e000000, v0 733; VI-NEXT: v_or_b32_e32 v1, v1, v2 734; VI-NEXT: s_setpc_b64 735define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half %val1, half %val2) #1 { 736 %vec0 = insertelement <4 x half> undef, half %val0, i32 0 737 %vec1 = insertelement <4 x half> %vec0, half %val1, i32 2 738 %vec2 = insertelement <4 x half> %vec1, half %val2, i32 3 739 %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec2) 740 ret <4 x half> %canonicalized 741} 742 743attributes #0 = { nounwind readnone } 744attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 745attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } 746attributes #3 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 747