1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5declare float @llvm.ceil.f32(float) nounwind readnone 6declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone 7declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone 8declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone 9declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone 10declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone 11 12; FUNC-LABEL: {{^}}fceil_f32: 13; SI: v_ceil_f32_e32 14; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] 15; EG: CEIL {{\*? *}}[[RESULT]] 16define amdgpu_kernel void @fceil_f32(float addrspace(1)* %out, float %x) { 17 %y = call float @llvm.ceil.f32(float %x) nounwind readnone 18 store float %y, float addrspace(1)* %out 19 ret void 20} 21 22; FUNC-LABEL: {{^}}fceil_v2f32: 23; SI: v_ceil_f32_e32 24; SI: v_ceil_f32_e32 25; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}} 26; EG: CEIL {{\*? *}}[[RESULT]] 27; EG: CEIL {{\*? *}}[[RESULT]] 28define amdgpu_kernel void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { 29 %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone 30 store <2 x float> %y, <2 x float> addrspace(1)* %out 31 ret void 32} 33 34; FUNC-LABEL: {{^}}fceil_v3f32: 35; FIXME-SI: v_ceil_f32_e32 36; FIXME-SI: v_ceil_f32_e32 37; FIXME-SI: v_ceil_f32_e32 38; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores 39; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}} 40; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}} 41; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 42; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 43; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 44define amdgpu_kernel void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { 45 %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone 46 store <3 x float> %y, <3 x float> addrspace(1)* %out 47 ret void 48} 49 50; FUNC-LABEL: {{^}}fceil_v4f32: 51; SI: v_ceil_f32_e32 52; SI: v_ceil_f32_e32 53; SI: v_ceil_f32_e32 54; SI: v_ceil_f32_e32 55; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}} 56; EG: CEIL {{\*? *}}[[RESULT]] 57; EG: CEIL {{\*? *}}[[RESULT]] 58; EG: CEIL {{\*? *}}[[RESULT]] 59; EG: CEIL {{\*? *}}[[RESULT]] 60define amdgpu_kernel void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { 61 %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone 62 store <4 x float> %y, <4 x float> addrspace(1)* %out 63 ret void 64} 65 66; FUNC-LABEL: {{^}}fceil_v8f32: 67; SI: v_ceil_f32_e32 68; SI: v_ceil_f32_e32 69; SI: v_ceil_f32_e32 70; SI: v_ceil_f32_e32 71; SI: v_ceil_f32_e32 72; SI: v_ceil_f32_e32 73; SI: v_ceil_f32_e32 74; SI: v_ceil_f32_e32 75; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}} 76; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}} 77; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 78; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 79; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 80; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 81; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 82; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 83; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 84; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 85define amdgpu_kernel void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { 86 %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone 87 store <8 x float> %y, <8 x float> addrspace(1)* %out 88 ret void 89} 90 91; FUNC-LABEL: {{^}}fceil_v16f32: 92; SI: v_ceil_f32_e32 93; SI: v_ceil_f32_e32 94; SI: v_ceil_f32_e32 95; SI: v_ceil_f32_e32 96; SI: v_ceil_f32_e32 97; SI: v_ceil_f32_e32 98; SI: v_ceil_f32_e32 99; SI: v_ceil_f32_e32 100; SI: v_ceil_f32_e32 101; SI: v_ceil_f32_e32 102; SI: v_ceil_f32_e32 103; SI: v_ceil_f32_e32 104; SI: v_ceil_f32_e32 105; SI: v_ceil_f32_e32 106; SI: v_ceil_f32_e32 107; SI: v_ceil_f32_e32 108; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}} 109; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}} 110; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}} 111; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}} 112; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 113; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 114; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 115; EG-DAG: CEIL {{\*? *}}[[RESULT1]] 116; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 117; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 118; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 119; EG-DAG: CEIL {{\*? *}}[[RESULT2]] 120; EG-DAG: CEIL {{\*? *}}[[RESULT3]] 121; EG-DAG: CEIL {{\*? *}}[[RESULT3]] 122; EG-DAG: CEIL {{\*? *}}[[RESULT3]] 123; EG-DAG: CEIL {{\*? *}}[[RESULT3]] 124; EG-DAG: CEIL {{\*? *}}[[RESULT4]] 125; EG-DAG: CEIL {{\*? *}}[[RESULT4]] 126; EG-DAG: CEIL {{\*? *}}[[RESULT4]] 127; EG-DAG: CEIL {{\*? *}}[[RESULT4]] 128define amdgpu_kernel void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { 129 %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone 130 store <16 x float> %y, <16 x float> addrspace(1)* %out 131 ret void 132} 133