1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC,SI 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=GCN,FUNC,VI 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=EG -check-prefix=FUNC 4 5declare float @llvm.fabs.f32(float) #1 6 7; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32: 8; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} 9 10; GCN: v_cvt_u32_f32_e32 11; GCN: s_endpgm 12define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) { 13 %conv = fptoui float %in to i32 14 store i32 %conv, i32 addrspace(1)* %out 15 ret void 16} 17 18; FUNC-LABEL: {{^}}fp_to_uint_v2f32_to_v2i32: 19; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} 20; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 21 22; GCN: v_cvt_u32_f32_e32 23; GCN: v_cvt_u32_f32_e32 24define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { 25 %result = fptoui <2 x float> %in to <2 x i32> 26 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 27 ret void 28} 29 30; FUNC-LABEL: {{^}}fp_to_uint_v4f32_to_v4i32: 31; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} 32; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 33; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} 34; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} 35; GCN: v_cvt_u32_f32_e32 36; GCN: v_cvt_u32_f32_e32 37; GCN: v_cvt_u32_f32_e32 38; GCN: v_cvt_u32_f32_e32 39 40define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 41 %value = load <4 x float>, <4 x float> addrspace(1) * %in 42 %result = fptoui <4 x float> %value to <4 x i32> 43 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 44 ret void 45} 46 47; FUNC: {{^}}fp_to_uint_f32_to_i64: 48; EG-DAG: AND_INT 49; EG-DAG: LSHR 50; EG-DAG: SUB_INT 51; EG-DAG: AND_INT 52; EG-DAG: ASHR 53; EG-DAG: AND_INT 54; EG-DAG: OR_INT 55; EG-DAG: SUB_INT 56; EG-DAG: LSHL 57; EG-DAG: LSHL 58; EG-DAG: SUB_INT 59; EG-DAG: LSHR 60; EG-DAG: LSHR 61; EG-DAG: SETGT_UINT 62; EG-DAG: SETGT_INT 63; EG-DAG: XOR_INT 64; EG-DAG: XOR_INT 65; EG: SUB_INT 66; EG-DAG: SUB_INT 67; EG-DAG: CNDE_INT 68; EG-DAG: CNDE_INT 69 70; GCN: s_endpgm 71define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) { 72 %conv = fptoui float %x to i64 73 store i64 %conv, i64 addrspace(1)* %out 74 ret void 75} 76 77; FUNC: {{^}}fp_to_uint_v2f32_to_v2i64: 78; EG-DAG: AND_INT 79; EG-DAG: LSHR 80; EG-DAG: SUB_INT 81; EG-DAG: AND_INT 82; EG-DAG: ASHR 83; EG-DAG: AND_INT 84; EG-DAG: OR_INT 85; EG-DAG: SUB_INT 86; EG-DAG: LSHL 87; EG-DAG: LSHL 88; EG-DAG: SUB_INT 89; EG-DAG: LSHR 90; EG-DAG: LSHR 91; EG-DAG: SETGT_UINT 92; EG-DAG: SETGT_INT 93; EG-DAG: XOR_INT 94; EG-DAG: XOR_INT 95; EG-DAG: SUB_INT 96; EG-DAG: SUB_INT 97; EG-DAG: CNDE_INT 98; EG-DAG: CNDE_INT 99; EG-DAG: AND_INT 100; EG-DAG: LSHR 101; EG-DAG: SUB_INT 102; EG-DAG: AND_INT 103; EG-DAG: ASHR 104; EG-DAG: AND_INT 105; EG-DAG: OR_INT 106; EG-DAG: SUB_INT 107; EG-DAG: LSHL 108; EG-DAG: LSHL 109; EG-DAG: SUB_INT 110; EG-DAG: LSHR 111; EG-DAG: LSHR 112; EG-DAG: SETGT_UINT 113; EG-DAG: SETGT_INT 114; EG-DAG: XOR_INT 115; EG-DAG: XOR_INT 116; EG-DAG: SUB_INT 117; EG-DAG: SUB_INT 118; EG-DAG: CNDE_INT 119; EG-DAG: CNDE_INT 120 121; GCN: s_endpgm 122define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { 123 %conv = fptoui <2 x float> %x to <2 x i64> 124 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out 125 ret void 126} 127 128; FUNC: {{^}}fp_to_uint_v4f32_to_v4i64: 129; EG-DAG: AND_INT 130; EG-DAG: LSHR 131; EG-DAG: SUB_INT 132; EG-DAG: AND_INT 133; EG-DAG: ASHR 134; EG-DAG: AND_INT 135; EG-DAG: OR_INT 136; EG-DAG: SUB_INT 137; EG-DAG: LSHL 138; EG-DAG: LSHL 139; EG-DAG: SUB_INT 140; EG-DAG: LSHR 141; EG-DAG: LSHR 142; EG-DAG: SETGT_UINT 143; EG-DAG: SETGT_INT 144; EG-DAG: XOR_INT 145; EG-DAG: XOR_INT 146; EG-DAG: SUB_INT 147; EG-DAG: SUB_INT 148; EG-DAG: CNDE_INT 149; EG-DAG: CNDE_INT 150; EG-DAG: AND_INT 151; EG-DAG: LSHR 152; EG-DAG: SUB_INT 153; EG-DAG: AND_INT 154; EG-DAG: ASHR 155; EG-DAG: AND_INT 156; EG-DAG: OR_INT 157; EG-DAG: SUB_INT 158; EG-DAG: LSHL 159; EG-DAG: LSHL 160; EG-DAG: SUB_INT 161; EG-DAG: LSHR 162; EG-DAG: LSHR 163; EG-DAG: SETGT_UINT 164; EG-DAG: SETGT_INT 165; EG-DAG: XOR_INT 166; EG-DAG: XOR_INT 167; EG-DAG: SUB_INT 168; EG-DAG: SUB_INT 169; EG-DAG: CNDE_INT 170; EG-DAG: CNDE_INT 171; EG-DAG: AND_INT 172; EG-DAG: LSHR 173; EG-DAG: SUB_INT 174; EG-DAG: AND_INT 175; EG-DAG: ASHR 176; EG-DAG: AND_INT 177; EG-DAG: OR_INT 178; EG-DAG: SUB_INT 179; EG-DAG: LSHL 180; EG-DAG: LSHL 181; EG-DAG: SUB_INT 182; EG-DAG: LSHR 183; EG-DAG: LSHR 184; EG-DAG: SETGT_UINT 185; EG-DAG: SETGT_INT 186; EG-DAG: XOR_INT 187; EG-DAG: XOR_INT 188; EG-DAG: SUB_INT 189; EG-DAG: SUB_INT 190; EG-DAG: CNDE_INT 191; EG-DAG: CNDE_INT 192; EG-DAG: AND_INT 193; EG-DAG: LSHR 194; EG-DAG: SUB_INT 195; EG-DAG: AND_INT 196; EG-DAG: ASHR 197; EG-DAG: AND_INT 198; EG-DAG: OR_INT 199; EG-DAG: SUB_INT 200; EG-DAG: LSHL 201; EG-DAG: LSHL 202; EG-DAG: SUB_INT 203; EG-DAG: LSHR 204; EG-DAG: LSHR 205; EG-DAG: SETGT_UINT 206; EG-DAG: SETGT_INT 207; EG-DAG: XOR_INT 208; EG-DAG: XOR_INT 209; EG-DAG: SUB_INT 210; EG-DAG: SUB_INT 211; EG-DAG: CNDE_INT 212; EG-DAG: CNDE_INT 213 214; GCN: s_endpgm 215define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { 216 %conv = fptoui <4 x float> %x to <4 x i64> 217 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out 218 ret void 219} 220 221 222; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: 223; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} 224 225; EG: AND_INT 226; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0, 227define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { 228 %conv = fptoui float %in to i1 229 store i1 %conv, i1 addrspace(1)* %out 230 ret void 231} 232 233; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: 234; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}| 235define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { 236 %in.fabs = call float @llvm.fabs.f32(float %in) 237 %conv = fptoui float %in.fabs to i1 238 store i1 %conv, i1 addrspace(1)* %out 239 ret void 240} 241 242; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i16: 243; The reason different instructions are used on SI and VI is because for 244; SI fp_to_uint is legalized by the type legalizer and for VI it is 245; legalized by the dag legalizer and they legalize fp_to_uint differently. 246; SI: v_cvt_u32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} 247; VI: v_cvt_i32_f32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} 248; GCN: buffer_store_short [[VAL]] 249define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 { 250 %uint = fptoui float %in to i16 251 store i16 %uint, i16 addrspace(1)* %out 252 ret void 253} 254 255attributes #0 = { nounwind } 256attributes #1 = { nounwind readnone } 257