1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}fptosi_f16_to_i16 5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 6; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 7; GCN: v_cvt_i32_f32_e32 v[[R_I16:[0-9]+]], v[[A_F32]] 8; GCN: buffer_store_short v[[R_I16]] 9; GCN: s_endpgm 10define amdgpu_kernel void @fptosi_f16_to_i16( 11 i16 addrspace(1)* %r, 12 half addrspace(1)* %a) { 13entry: 14 %a.val = load half, half addrspace(1)* %a 15 %r.val = fptosi half %a.val to i16 16 store i16 %r.val, i16 addrspace(1)* %r 17 ret void 18} 19 20; GCN-LABEL: {{^}}fptosi_f16_to_i32 21; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 22; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 23; GCN: v_cvt_i32_f32_e32 v[[R_I32:[0-9]+]], v[[A_F32]] 24; GCN: buffer_store_dword v[[R_I32]] 25; GCN: s_endpgm 26define amdgpu_kernel void @fptosi_f16_to_i32( 27 i32 addrspace(1)* %r, 28 half addrspace(1)* %a) { 29entry: 30 %a.val = load half, half addrspace(1)* %a 31 %r.val = fptosi half %a.val to i32 32 store i32 %r.val, i32 addrspace(1)* %r 33 ret void 34} 35 36; Need to make sure we promote f16 to f32 when converting f16 to i64. Existing 37; test checks code generated for 'i64 = fp_to_sint f32'. 38 39; GCN-LABEL: {{^}}fptosi_f16_to_i64 40; GCN: buffer_load_ushort 41; GCN: v_cvt_f32_f16_e32 42; GCN: s_endpgm 43define amdgpu_kernel void @fptosi_f16_to_i64( 44 i64 addrspace(1)* %r, 45 half addrspace(1)* %a) { 46entry: 47 %a.val = load half, half addrspace(1)* %a 48 %r.val = fptosi half %a.val to i64 49 store i64 %r.val, i64 addrspace(1)* %r 50 ret void 51} 52 53; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i16 54; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 55 56; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 57; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 58; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 59; SI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] 60; SI-DAG: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] 61; SI-DAG: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] 62; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] 63; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_LO]], v[[R_I16_HI]] 64 65; VI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 66; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 67; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] 68; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 69; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 70 71; GCN: buffer_store_dword v[[R_V2_I16]] 72; GCN: s_endpgm 73 74define amdgpu_kernel void @fptosi_v2f16_to_v2i16( 75 <2 x i16> addrspace(1)* %r, 76 <2 x half> addrspace(1)* %a) { 77entry: 78 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 79 %r.val = fptosi <2 x half> %a.val to <2 x i16> 80 store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r 81 ret void 82} 83 84; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i32 85; GCN: buffer_load_dword 86; GCN: v_cvt_f32_f16_e32 87; SI: v_cvt_f32_f16_e32 88; VI: v_cvt_f32_f16_sdwa 89; GCN: v_cvt_i32_f32_e32 90; GCN: v_cvt_i32_f32_e32 91; GCN: buffer_store_dwordx2 92; GCN: s_endpgm 93define amdgpu_kernel void @fptosi_v2f16_to_v2i32( 94 <2 x i32> addrspace(1)* %r, 95 <2 x half> addrspace(1)* %a) { 96entry: 97 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 98 %r.val = fptosi <2 x half> %a.val to <2 x i32> 99 store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r 100 ret void 101} 102 103; Need to make sure we promote f16 to f32 when converting f16 to i64. Existing 104; test checks code generated for 'i64 = fp_to_sint f32'. 105 106; GCN-LABEL: {{^}}fptosi_v2f16_to_v2i64 107; GCN: buffer_load_dword 108; GCN: v_cvt_f32_f16_e32 109; SI: v_cvt_f32_f16_e32 110; VI: v_cvt_f32_f16_sdwa 111; GCN: s_endpgm 112define amdgpu_kernel void @fptosi_v2f16_to_v2i64( 113 <2 x i64> addrspace(1)* %r, 114 <2 x half> addrspace(1)* %a) { 115entry: 116 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 117 %r.val = fptosi <2 x half> %a.val to <2 x i64> 118 store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r 119 ret void 120} 121