1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4declare float @llvm.maxnum.f32(float, float) #0 5declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 6declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0 7declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0 8declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0 9 10declare double @llvm.maxnum.f64(double, double) 11 12; FUNC-LABEL: @test_fmax_f32 13; SI: v_max_f32_e32 14 15; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 16; EG: MAX_DX10 {{.*}}[[OUT]] 17define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind { 18 %val = call float @llvm.maxnum.f32(float %a, float %b) #0 19 store float %val, float addrspace(1)* %out, align 4 20 ret void 21} 22 23; FUNC-LABEL: @test_fmax_v2f32 24; SI: v_max_f32_e32 25; SI: v_max_f32_e32 26 27; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] 28; EG: MAX_DX10 {{.*}}[[OUT]] 29; EG: MAX_DX10 {{.*}}[[OUT]] 30define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind { 31 %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0 32 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 33 ret void 34} 35 36; FUNC-LABEL: @test_fmax_v4f32 37; SI: v_max_f32_e32 38; SI: v_max_f32_e32 39; SI: v_max_f32_e32 40; SI: v_max_f32_e32 41 42; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] 43; EG: MAX_DX10 {{.*}}[[OUT]] 44; EG: MAX_DX10 {{.*}}[[OUT]] 45; EG: MAX_DX10 {{.*}}[[OUT]] 46; EG: MAX_DX10 {{.*}}[[OUT]] 47define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind { 48 %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0 49 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 50 ret void 51} 52 53; FUNC-LABEL: @test_fmax_v8f32 54; SI: v_max_f32_e32 55; SI: v_max_f32_e32 56; SI: v_max_f32_e32 57; SI: v_max_f32_e32 58; SI: v_max_f32_e32 59; SI: v_max_f32_e32 60; SI: v_max_f32_e32 61; SI: v_max_f32_e32 62 63; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] 64; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] 65; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X 66; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y 67; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z 68; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W 69; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X 70; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y 71; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z 72; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W 73define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind { 74 %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0 75 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 76 ret void 77} 78 79; FUNC-LABEL: @test_fmax_v16f32 80; SI: v_max_f32_e32 81; SI: v_max_f32_e32 82; SI: v_max_f32_e32 83; SI: v_max_f32_e32 84; SI: v_max_f32_e32 85; SI: v_max_f32_e32 86; SI: v_max_f32_e32 87; SI: v_max_f32_e32 88; SI: v_max_f32_e32 89; SI: v_max_f32_e32 90; SI: v_max_f32_e32 91; SI: v_max_f32_e32 92; SI: v_max_f32_e32 93; SI: v_max_f32_e32 94; SI: v_max_f32_e32 95; SI: v_max_f32_e32 96 97; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] 98; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] 99; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]] 100; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]] 101; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X 102; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y 103; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z 104; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W 105; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X 106; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y 107; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z 108; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W 109; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X 110; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y 111; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z 112; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W 113; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X 114; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y 115; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z 116; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W 117define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind { 118 %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0 119 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 120 ret void 121} 122 123; FUNC-LABEL: @constant_fold_fmax_f32 124; SI-NOT: v_max_f32_e32 125; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0 126; SI: buffer_store_dword [[REG]] 127 128; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 129; EG-NOT: MAX_DX10 130; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 131define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind { 132 %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0 133 store float %val, float addrspace(1)* %out, align 4 134 ret void 135} 136 137; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan 138; SI-NOT: v_max_f32_e32 139; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 140; SI: buffer_store_dword [[REG]] 141 142; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 143; EG-NOT: MAX_DX10 144; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 145; EG: 2143289344(nan) 146define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind { 147 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0 148 store float %val, float addrspace(1)* %out, align 4 149 ret void 150} 151 152; FUNC-LABEL: @constant_fold_fmax_f32_val_nan 153; SI-NOT: v_max_f32_e32 154; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 155; SI: buffer_store_dword [[REG]] 156 157; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 158; EG-NOT: MAX_DX10 159; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 160define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind { 161 %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0 162 store float %val, float addrspace(1)* %out, align 4 163 ret void 164} 165 166; FUNC-LABEL: @constant_fold_fmax_f32_nan_val 167; SI-NOT: v_max_f32_e32 168; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 169; SI: buffer_store_dword [[REG]] 170 171; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 172; EG-NOT: MAX_DX10 173; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 174define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind { 175 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0 176 store float %val, float addrspace(1)* %out, align 4 177 ret void 178} 179 180; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0 181; SI-NOT: v_max_f32_e32 182; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 183; SI: buffer_store_dword [[REG]] 184 185; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 186; EG-NOT: MAX_DX10 187; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 188define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind { 189 %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0 190 store float %val, float addrspace(1)* %out, align 4 191 ret void 192} 193 194; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0 195; SI-NOT: v_max_f32_e32 196; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 197; SI: buffer_store_dword [[REG]] 198 199; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 200; EG-NOT: MAX_DX10 201; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 202define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind { 203 %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0 204 store float %val, float addrspace(1)* %out, align 4 205 ret void 206} 207 208; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0 209; SI-NOT: v_max_f32_e32 210; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 211; SI: buffer_store_dword [[REG]] 212 213; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 214; EG-NOT: MAX_DX10 215; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 216define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind { 217 %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0 218 store float %val, float addrspace(1)* %out, align 4 219 ret void 220} 221 222; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0 223; SI-NOT: v_max_f32_e32 224; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 225; SI: buffer_store_dword [[REG]] 226 227; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 228; EG-NOT: MAX_DX10 229; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 230define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind { 231 %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0 232 store float %val, float addrspace(1)* %out, align 4 233 ret void 234} 235 236; FUNC-LABEL: @fmax_var_immediate_f32 237; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} 238 239; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 240; EG-NOT: MAX_DX10 241; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} 242define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind { 243 %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0 244 store float %val, float addrspace(1)* %out, align 4 245 ret void 246} 247 248; FUNC-LABEL: @fmax_immediate_var_f32 249; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} 250 251; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 252; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} 253define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind { 254 %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0 255 store float %val, float addrspace(1)* %out, align 4 256 ret void 257} 258 259; FUNC-LABEL: @fmax_var_literal_f32 260; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 261; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 262 263; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 264; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} 265define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind { 266 %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0 267 store float %val, float addrspace(1)* %out, align 4 268 ret void 269} 270 271; FUNC-LABEL: @fmax_literal_var_f32 272; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 273; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 274 275; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] 276; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} 277define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind { 278 %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0 279 store float %val, float addrspace(1)* %out, align 4 280 ret void 281} 282 283attributes #0 = { nounwind readnone } 284