1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s 3 4; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1: 5; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}} 6define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) { 7bb: 8 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 9 %tmp1 = zext i32 %tmp to i64 10 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 11 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 12 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH3C00>) 13 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 14 ret void 15} 16 17; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0: 18; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}} 19define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) { 20bb: 21 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 22 %tmp1 = zext i32 %tmp to i64 23 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 24 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 25 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>) 26 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 27 ret void 28} 29 30; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1: 31; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}} 32define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) { 33bb: 34 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 35 %tmp1 = zext i32 %tmp to i64 36 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 37 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 38 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH3C00>) 39 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 40 ret void 41} 42 43; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1: 44; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}} 45define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) { 46bb: 47 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 48 %tmp1 = zext i32 %tmp to i64 49 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 50 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 51 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xHBC00>) 52 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 53 ret void 54} 55 56; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0: 57; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}} 58define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) { 59bb: 60 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 61 %tmp1 = zext i32 %tmp to i64 62 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 63 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 64 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xH0000>) 65 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 66 ret void 67} 68 69; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1: 70; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}} 71define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) { 72bb: 73 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 74 %tmp1 = zext i32 %tmp to i64 75 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 76 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 77 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xHBC00, half 0xHBC00>) 78 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 79 ret void 80} 81 82; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0: 83; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} 84define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) { 85bb: 86 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 87 %tmp1 = zext i32 %tmp to i64 88 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 89 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 90 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH0000>) 91 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 92 ret void 93} 94 95; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8: 96; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000 97; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}} 98; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}} 99define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) { 100bb: 101 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 102 %tmp1 = zext i32 %tmp to i64 103 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 104 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 105 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH41C8>) 106 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 107 ret void 108} 109 110; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0: 111; GFX9: s_movk_i32 [[C:s[0-9]+]], 0x41c8 112; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}} 113; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}} 114define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) { 115bb: 116 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 117 %tmp1 = zext i32 %tmp to i64 118 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 119 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 120 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH41C8, half 0xH0>) 121 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 122 ret void 123} 124 125; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8: 126; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c842ca 127; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}} 128; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}} 129define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) { 130bb: 131 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 132 %tmp1 = zext i32 %tmp to i64 133 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 134 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 135 %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH42CA, half 0xH41C8>) 136 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 137 ret void 138} 139 140declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) 141declare i32 @llvm.amdgcn.workitem.id.x() 142