1 // SPDX-License-Identifier: GPL-2.0
2 /* MPTCP socket monitoring support
3 *
4 * Copyright (c) 2020 Red Hat
5 *
6 * Author: Paolo Abeni <pabeni@redhat.com>
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/net.h>
11 #include <linux/inet_diag.h>
12 #include <net/netlink.h>
13 #include <uapi/linux/mptcp.h>
14 #include "protocol.h"
15
sk_diag_dump(struct sock * sk,struct sk_buff * skb,struct netlink_callback * cb,const struct inet_diag_req_v2 * req,struct nlattr * bc,bool net_admin)16 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
17 struct netlink_callback *cb,
18 const struct inet_diag_req_v2 *req,
19 struct nlattr *bc, bool net_admin)
20 {
21 if (!inet_diag_bc_sk(bc, sk))
22 return 0;
23
24 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI,
25 net_admin);
26 }
27
mptcp_diag_dump_one(struct netlink_callback * cb,const struct inet_diag_req_v2 * req)28 static int mptcp_diag_dump_one(struct netlink_callback *cb,
29 const struct inet_diag_req_v2 *req)
30 {
31 struct sk_buff *in_skb = cb->skb;
32 struct mptcp_sock *msk = NULL;
33 struct sk_buff *rep;
34 int err = -ENOENT;
35 struct net *net;
36 struct sock *sk;
37
38 net = sock_net(in_skb->sk);
39 msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
40 if (!msk)
41 goto out_nosk;
42
43 err = -ENOMEM;
44 sk = (struct sock *)msk;
45 rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
46 inet_diag_msg_attrs_size() +
47 nla_total_size(sizeof(struct mptcp_info)) +
48 nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
49 GFP_KERNEL);
50 if (!rep)
51 goto out;
52
53 err = inet_sk_diag_fill(sk, inet_csk(sk), rep, cb, req, 0,
54 netlink_net_capable(in_skb, CAP_NET_ADMIN));
55 if (err < 0) {
56 WARN_ON(err == -EMSGSIZE);
57 kfree_skb(rep);
58 goto out;
59 }
60 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
61
62 out:
63 sock_put(sk);
64
65 out_nosk:
66 return err;
67 }
68
69 struct mptcp_diag_ctx {
70 long s_slot;
71 long s_num;
72 unsigned int l_slot;
73 unsigned int l_num;
74 };
75
mptcp_diag_dump_listeners(struct sk_buff * skb,struct netlink_callback * cb,const struct inet_diag_req_v2 * r,bool net_admin)76 static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
77 const struct inet_diag_req_v2 *r,
78 bool net_admin)
79 {
80 struct inet_diag_dump_data *cb_data = cb->data;
81 struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
82 struct nlattr *bc = cb_data->inet_diag_nla_bc;
83 struct net *net = sock_net(skb->sk);
84 int i;
85
86 for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) {
87 struct inet_listen_hashbucket *ilb;
88 struct hlist_nulls_node *node;
89 struct sock *sk;
90 int num = 0;
91
92 ilb = &tcp_hashinfo.listening_hash[i];
93
94 rcu_read_lock();
95 spin_lock(&ilb->lock);
96 sk_nulls_for_each(sk, node, &ilb->nulls_head) {
97 const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
98 struct inet_sock *inet = inet_sk(sk);
99 int ret;
100
101 if (num < diag_ctx->l_num)
102 goto next_listen;
103
104 if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
105 goto next_listen;
106
107 sk = ctx->conn;
108 if (!sk || !net_eq(sock_net(sk), net))
109 goto next_listen;
110
111 if (r->sdiag_family != AF_UNSPEC &&
112 sk->sk_family != r->sdiag_family)
113 goto next_listen;
114
115 if (r->id.idiag_sport != inet->inet_sport &&
116 r->id.idiag_sport)
117 goto next_listen;
118
119 if (!refcount_inc_not_zero(&sk->sk_refcnt))
120 goto next_listen;
121
122 ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
123
124 sock_put(sk);
125
126 if (ret < 0) {
127 spin_unlock(&ilb->lock);
128 rcu_read_unlock();
129 diag_ctx->l_slot = i;
130 diag_ctx->l_num = num;
131 return;
132 }
133 diag_ctx->l_num = num + 1;
134 num = 0;
135 next_listen:
136 ++num;
137 }
138 spin_unlock(&ilb->lock);
139 rcu_read_unlock();
140
141 cond_resched();
142 diag_ctx->l_num = 0;
143 }
144
145 diag_ctx->l_num = 0;
146 diag_ctx->l_slot = i;
147 }
148
mptcp_diag_dump(struct sk_buff * skb,struct netlink_callback * cb,const struct inet_diag_req_v2 * r)149 static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
150 const struct inet_diag_req_v2 *r)
151 {
152 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
153 struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
154 struct net *net = sock_net(skb->sk);
155 struct inet_diag_dump_data *cb_data;
156 struct mptcp_sock *msk;
157 struct nlattr *bc;
158
159 BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
160
161 cb_data = cb->data;
162 bc = cb_data->inet_diag_nla_bc;
163
164 while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
165 &diag_ctx->s_num)) != NULL) {
166 struct inet_sock *inet = (struct inet_sock *)msk;
167 struct sock *sk = (struct sock *)msk;
168 int ret = 0;
169
170 if (!(r->idiag_states & (1 << sk->sk_state)))
171 goto next;
172 if (r->sdiag_family != AF_UNSPEC &&
173 sk->sk_family != r->sdiag_family)
174 goto next;
175 if (r->id.idiag_sport != inet->inet_sport &&
176 r->id.idiag_sport)
177 goto next;
178 if (r->id.idiag_dport != inet->inet_dport &&
179 r->id.idiag_dport)
180 goto next;
181
182 ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
183 next:
184 sock_put(sk);
185 if (ret < 0) {
186 /* will retry on the same position */
187 diag_ctx->s_num--;
188 break;
189 }
190 cond_resched();
191 }
192
193 if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
194 mptcp_diag_dump_listeners(skb, cb, r, net_admin);
195 }
196
mptcp_diag_get_info(struct sock * sk,struct inet_diag_msg * r,void * _info)197 static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
198 void *_info)
199 {
200 struct mptcp_sock *msk = mptcp_sk(sk);
201 struct mptcp_info *info = _info;
202 u32 flags = 0;
203 bool slow;
204 u8 val;
205
206 r->idiag_rqueue = sk_rmem_alloc_get(sk);
207 r->idiag_wqueue = sk_wmem_alloc_get(sk);
208
209 if (inet_sk_state_load(sk) == TCP_LISTEN) {
210 struct sock *lsk = READ_ONCE(msk->first);
211
212 if (lsk) {
213 /* override with settings from tcp listener,
214 * so Send-Q will show accept queue.
215 */
216 r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
217 r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
218 }
219 }
220
221 if (!info)
222 return;
223
224 slow = lock_sock_fast(sk);
225 info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
226 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
227 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
228 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
229 info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
230 val = mptcp_pm_get_add_addr_signal_max(msk);
231 info->mptcpi_add_addr_signal_max = val;
232 val = mptcp_pm_get_add_addr_accept_max(msk);
233 info->mptcpi_add_addr_accepted_max = val;
234 info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
235 if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
236 flags |= MPTCP_INFO_FLAG_FALLBACK;
237 if (READ_ONCE(msk->can_ack))
238 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
239 info->mptcpi_flags = flags;
240 info->mptcpi_token = READ_ONCE(msk->token);
241 info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
242 info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
243 info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
244 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
245 unlock_sock_fast(sk, slow);
246 }
247
248 static const struct inet_diag_handler mptcp_diag_handler = {
249 .dump = mptcp_diag_dump,
250 .dump_one = mptcp_diag_dump_one,
251 .idiag_get_info = mptcp_diag_get_info,
252 .idiag_type = IPPROTO_MPTCP,
253 .idiag_info_size = sizeof(struct mptcp_info),
254 };
255
mptcp_diag_init(void)256 static int __init mptcp_diag_init(void)
257 {
258 return inet_diag_register(&mptcp_diag_handler);
259 }
260
mptcp_diag_exit(void)261 static void __exit mptcp_diag_exit(void)
262 {
263 inet_diag_unregister(&mptcp_diag_handler);
264 }
265
266 module_init(mptcp_diag_init);
267 module_exit(mptcp_diag_exit);
268 MODULE_LICENSE("GPL");
269 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
270