1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 2 3declare half @llvm.fabs.f16(half %a) 4declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b) 5 6; GCN-LABEL: {{^}}class_f16: 7; GCN-DAG: buffer_load_ushort v[[A_F16:[0-9]+]] 8; GCN-DAG: buffer_load_dword v[[B_I32:[0-9]+]] 9; VI: v_cmp_class_f16_e32 vcc, v[[A_F16]], v[[B_I32]] 10; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 11; GCN: buffer_store_dword v[[R_I32]] 12; GCN: s_endpgm 13define amdgpu_kernel void @class_f16( 14 i32 addrspace(1)* %r, 15 half addrspace(1)* %a, 16 i32 addrspace(1)* %b) { 17entry: 18 %a.val = load half, half addrspace(1)* %a 19 %b.val = load i32, i32 addrspace(1)* %b 20 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val) 21 %r.val.sext = sext i1 %r.val to i32 22 store i32 %r.val.sext, i32 addrspace(1)* %r 23 ret void 24} 25 26; GCN-LABEL: {{^}}class_f16_fabs: 27; GCN: s_load_dword s[[SA_F16:[0-9]+]] 28; GCN: s_load_dword s[[SB_I32:[0-9]+]] 29; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]] 30; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |s[[SA_F16]]|, [[V_B_I32]] 31; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 32; GCN: buffer_store_dword v[[VR_I32]] 33; GCN: s_endpgm 34define amdgpu_kernel void @class_f16_fabs( 35 i32 addrspace(1)* %r, 36 [8 x i32], 37 half %a.val, 38 [8 x i32], 39 i32 %b.val) { 40entry: 41 %a.val.fabs = call half @llvm.fabs.f16(half %a.val) 42 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val) 43 %r.val.sext = sext i1 %r.val to i32 44 store i32 %r.val.sext, i32 addrspace(1)* %r 45 ret void 46} 47 48; GCN-LABEL: {{^}}class_f16_fneg: 49; GCN: s_load_dword s[[SA_F16:[0-9]+]] 50; GCN: s_load_dword s[[SB_I32:[0-9]+]] 51; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]] 52; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -s[[SA_F16]], [[V_B_I32]] 53; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 54; GCN: buffer_store_dword v[[VR_I32]] 55; GCN: s_endpgm 56define amdgpu_kernel void @class_f16_fneg( 57 i32 addrspace(1)* %r, 58 [8 x i32], 59 half %a.val, 60 [8 x i32], 61 i32 %b.val) { 62entry: 63 %a.val.fneg = fsub half -0.0, %a.val 64 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val) 65 %r.val.sext = sext i1 %r.val to i32 66 store i32 %r.val.sext, i32 addrspace(1)* %r 67 ret void 68} 69 70; GCN-LABEL: {{^}}class_f16_fabs_fneg: 71; GCN: s_load_dword s[[SA_F16:[0-9]+]] 72; GCN: s_load_dword s[[SB_I32:[0-9]+]] 73; GCN: v_mov_b32_e32 [[V_B_I32:v[0-9]+]], s[[SB_I32]] 74; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|s[[SA_F16]]|, [[V_B_I32]] 75; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 76; GCN: buffer_store_dword v[[VR_I32]] 77; GCN: s_endpgm 78define amdgpu_kernel void @class_f16_fabs_fneg( 79 i32 addrspace(1)* %r, 80 [8 x i32], 81 half %a.val, 82 [8 x i32], 83 i32 %b.val) { 84entry: 85 %a.val.fabs = call half @llvm.fabs.f16(half %a.val) 86 %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs 87 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val) 88 %r.val.sext = sext i1 %r.val to i32 89 store i32 %r.val.sext, i32 addrspace(1)* %r 90 ret void 91} 92 93; GCN-LABEL: {{^}}class_f16_1: 94; GCN: s_load_dword s[[SA_F16:[0-9]+]] 95; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s[[SA_F16]], 1{{$}} 96; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 97; GCN: buffer_store_dword v[[VR_I32]] 98; GCN: s_endpgm 99define amdgpu_kernel void @class_f16_1( 100 i32 addrspace(1)* %r, 101 half %a.val) { 102entry: 103 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1) 104 %r.val.sext = sext i1 %r.val to i32 105 store i32 %r.val.sext, i32 addrspace(1)* %r 106 ret void 107} 108 109; GCN-LABEL: {{^}}class_f16_64 110; GCN: s_load_dword s[[SA_F16:[0-9]+]] 111; VI: v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], s[[SA_F16]], 64{{$}} 112; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]] 113; GCN: buffer_store_dword v[[VR_I32]] 114; GCN: s_endpgm 115define amdgpu_kernel void @class_f16_64( 116 i32 addrspace(1)* %r, 117 half %a.val) { 118entry: 119 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64) 120 %r.val.sext = sext i1 %r.val to i32 121 store i32 %r.val.sext, i32 addrspace(1)* %r 122 ret void 123} 124 125; GCN-LABEL: {{^}}class_f16_full_mask: 126; GCN: s_load_dword s[[SA_F16:[0-9]+]] 127; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x3ff{{$}} 128; VI: v_cmp_class_f16_e32 vcc, s[[SA_F16]], v[[MASK]] 129; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc 130; GCN: buffer_store_dword v[[VR_I32]] 131; GCN: s_endpgm 132define amdgpu_kernel void @class_f16_full_mask( 133 i32 addrspace(1)* %r, 134 half %a.val) { 135entry: 136 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023) 137 %r.val.sext = sext i1 %r.val to i32 138 store i32 %r.val.sext, i32 addrspace(1)* %r 139 ret void 140} 141 142; GCN-LABEL: {{^}}class_f16_nine_bit_mask: 143; GCN: s_load_dword s[[SA_F16:[0-9]+]] 144; VI: v_mov_b32_e32 v[[MASK:[0-9]+]], 0x1ff{{$}} 145; VI: v_cmp_class_f16_e32 vcc, s[[SA_F16]], v[[MASK]] 146; VI: v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, vcc 147; GCN: buffer_store_dword v[[VR_I32]] 148; GCN: s_endpgm 149define amdgpu_kernel void @class_f16_nine_bit_mask( 150 i32 addrspace(1)* %r, 151 half %a.val) { 152entry: 153 %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511) 154 %r.val.sext = sext i1 %r.val to i32 155 store i32 %r.val.sext, i32 addrspace(1)* %r 156 ret void 157} 158