• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Management Component Transport Protocol (MCTP) - routing
4  * implementation.
5  *
6  * This is currently based on a simple routing table, with no dst cache. The
7  * number of routes should stay fairly small, so the lookup cost is small.
8  *
9  * Copyright (c) 2021 Code Construct
10  * Copyright (c) 2021 Google
11  */
12 
13 #include <linux/idr.h>
14 #include <linux/mctp.h>
15 #include <linux/netdevice.h>
16 #include <linux/rtnetlink.h>
17 #include <linux/skbuff.h>
18 
19 #include <uapi/linux/if_arp.h>
20 
21 #include <net/mctp.h>
22 #include <net/mctpdevice.h>
23 #include <net/netlink.h>
24 #include <net/sock.h>
25 
26 static const unsigned int mctp_message_maxlen = 64 * 1024;
27 
28 /* route output callbacks */
mctp_route_discard(struct mctp_route * route,struct sk_buff * skb)29 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
30 {
31 	kfree_skb(skb);
32 	return 0;
33 }
34 
mctp_lookup_bind(struct net * net,struct sk_buff * skb)35 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
36 {
37 	struct mctp_skb_cb *cb = mctp_cb(skb);
38 	struct mctp_hdr *mh;
39 	struct sock *sk;
40 	u8 type;
41 
42 	WARN_ON(!rcu_read_lock_held());
43 
44 	/* TODO: look up in skb->cb? */
45 	mh = mctp_hdr(skb);
46 
47 	if (!skb_headlen(skb))
48 		return NULL;
49 
50 	type = (*(u8 *)skb->data) & 0x7f;
51 
52 	sk_for_each_rcu(sk, &net->mctp.binds) {
53 		struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
54 
55 		if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
56 			continue;
57 
58 		if (msk->bind_type != type)
59 			continue;
60 
61 		if (msk->bind_addr != MCTP_ADDR_ANY &&
62 		    msk->bind_addr != mh->dest)
63 			continue;
64 
65 		return msk;
66 	}
67 
68 	return NULL;
69 }
70 
mctp_key_match(struct mctp_sk_key * key,mctp_eid_t local,mctp_eid_t peer,u8 tag)71 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
72 			   mctp_eid_t peer, u8 tag)
73 {
74 	if (key->local_addr != local)
75 		return false;
76 
77 	if (key->peer_addr != peer)
78 		return false;
79 
80 	if (key->tag != tag)
81 		return false;
82 
83 	return true;
84 }
85 
mctp_lookup_key(struct net * net,struct sk_buff * skb,mctp_eid_t peer)86 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
87 					   mctp_eid_t peer)
88 {
89 	struct mctp_sk_key *key, *ret;
90 	struct mctp_hdr *mh;
91 	u8 tag;
92 
93 	WARN_ON(!rcu_read_lock_held());
94 
95 	mh = mctp_hdr(skb);
96 	tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
97 
98 	ret = NULL;
99 
100 	hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
101 		if (mctp_key_match(key, mh->dest, peer, tag)) {
102 			ret = key;
103 			break;
104 		}
105 	}
106 
107 	return ret;
108 }
109 
mctp_key_alloc(struct mctp_sock * msk,mctp_eid_t local,mctp_eid_t peer,u8 tag,gfp_t gfp)110 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
111 					  mctp_eid_t local, mctp_eid_t peer,
112 					  u8 tag, gfp_t gfp)
113 {
114 	struct mctp_sk_key *key;
115 
116 	key = kzalloc(sizeof(*key), gfp);
117 	if (!key)
118 		return NULL;
119 
120 	key->peer_addr = peer;
121 	key->local_addr = local;
122 	key->tag = tag;
123 	key->sk = &msk->sk;
124 	spin_lock_init(&key->reasm_lock);
125 
126 	return key;
127 }
128 
mctp_key_add(struct mctp_sk_key * key,struct mctp_sock * msk)129 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
130 {
131 	struct net *net = sock_net(&msk->sk);
132 	struct mctp_sk_key *tmp;
133 	unsigned long flags;
134 	int rc = 0;
135 
136 	spin_lock_irqsave(&net->mctp.keys_lock, flags);
137 
138 	if (sock_flag(&msk->sk, SOCK_DEAD)) {
139 		rc = -EINVAL;
140 		goto out_unlock;
141 	}
142 
143 	hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
144 		if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
145 				   key->tag)) {
146 			rc = -EEXIST;
147 			break;
148 		}
149 	}
150 
151 	if (!rc) {
152 		hlist_add_head(&key->hlist, &net->mctp.keys);
153 		hlist_add_head(&key->sklist, &msk->keys);
154 	}
155 
156 out_unlock:
157 	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
158 
159 	return rc;
160 }
161 
162 /* Must be called with key->reasm_lock, which it will release. Will schedule
163  * the key for an RCU free.
164  */
__mctp_key_unlock_drop(struct mctp_sk_key * key,struct net * net,unsigned long flags)165 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
166 				   unsigned long flags)
167 	__releases(&key->reasm_lock)
168 {
169 	struct sk_buff *skb;
170 
171 	skb = key->reasm_head;
172 	key->reasm_head = NULL;
173 	key->reasm_dead = true;
174 	spin_unlock_irqrestore(&key->reasm_lock, flags);
175 
176 	spin_lock_irqsave(&net->mctp.keys_lock, flags);
177 	hlist_del_rcu(&key->hlist);
178 	hlist_del_rcu(&key->sklist);
179 	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
180 	kfree_rcu(key, rcu);
181 
182 	if (skb)
183 		kfree_skb(skb);
184 }
185 
mctp_frag_queue(struct mctp_sk_key * key,struct sk_buff * skb)186 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
187 {
188 	struct mctp_hdr *hdr = mctp_hdr(skb);
189 	u8 exp_seq, this_seq;
190 
191 	this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
192 		& MCTP_HDR_SEQ_MASK;
193 
194 	if (!key->reasm_head) {
195 		key->reasm_head = skb;
196 		key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
197 		key->last_seq = this_seq;
198 		return 0;
199 	}
200 
201 	exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
202 
203 	if (this_seq != exp_seq)
204 		return -EINVAL;
205 
206 	if (key->reasm_head->len + skb->len > mctp_message_maxlen)
207 		return -EINVAL;
208 
209 	skb->next = NULL;
210 	skb->sk = NULL;
211 	*key->reasm_tailp = skb;
212 	key->reasm_tailp = &skb->next;
213 
214 	key->last_seq = this_seq;
215 
216 	key->reasm_head->data_len += skb->len;
217 	key->reasm_head->len += skb->len;
218 	key->reasm_head->truesize += skb->truesize;
219 
220 	return 0;
221 }
222 
mctp_route_input(struct mctp_route * route,struct sk_buff * skb)223 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
224 {
225 	struct net *net = dev_net(skb->dev);
226 	struct mctp_sk_key *key;
227 	struct mctp_sock *msk;
228 	struct mctp_hdr *mh;
229 	unsigned long f;
230 	u8 tag, flags;
231 	int rc;
232 
233 	msk = NULL;
234 	rc = -EINVAL;
235 
236 	/* we may be receiving a locally-routed packet; drop source sk
237 	 * accounting
238 	 */
239 	skb_orphan(skb);
240 
241 	/* ensure we have enough data for a header and a type */
242 	if (skb->len < sizeof(struct mctp_hdr) + 1)
243 		goto out;
244 
245 	/* grab header, advance data ptr */
246 	mh = mctp_hdr(skb);
247 	skb_pull(skb, sizeof(struct mctp_hdr));
248 
249 	if (mh->ver != 1)
250 		goto out;
251 
252 	flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
253 	tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
254 
255 	rcu_read_lock();
256 
257 	/* lookup socket / reasm context, exactly matching (src,dest,tag) */
258 	key = mctp_lookup_key(net, skb, mh->src);
259 
260 	if (flags & MCTP_HDR_FLAG_SOM) {
261 		if (key) {
262 			msk = container_of(key->sk, struct mctp_sock, sk);
263 		} else {
264 			/* first response to a broadcast? do a more general
265 			 * key lookup to find the socket, but don't use this
266 			 * key for reassembly - we'll create a more specific
267 			 * one for future packets if required (ie, !EOM).
268 			 */
269 			key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
270 			if (key) {
271 				msk = container_of(key->sk,
272 						   struct mctp_sock, sk);
273 				key = NULL;
274 			}
275 		}
276 
277 		if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
278 			msk = mctp_lookup_bind(net, skb);
279 
280 		if (!msk) {
281 			rc = -ENOENT;
282 			goto out_unlock;
283 		}
284 
285 		/* single-packet message? deliver to socket, clean up any
286 		 * pending key.
287 		 */
288 		if (flags & MCTP_HDR_FLAG_EOM) {
289 			sock_queue_rcv_skb(&msk->sk, skb);
290 			if (key) {
291 				spin_lock_irqsave(&key->reasm_lock, f);
292 				/* we've hit a pending reassembly; not much we
293 				 * can do but drop it
294 				 */
295 				__mctp_key_unlock_drop(key, net, f);
296 			}
297 			rc = 0;
298 			goto out_unlock;
299 		}
300 
301 		/* broadcast response or a bind() - create a key for further
302 		 * packets for this message
303 		 */
304 		if (!key) {
305 			key = mctp_key_alloc(msk, mh->dest, mh->src,
306 					     tag, GFP_ATOMIC);
307 			if (!key) {
308 				rc = -ENOMEM;
309 				goto out_unlock;
310 			}
311 
312 			/* we can queue without the reasm lock here, as the
313 			 * key isn't observable yet
314 			 */
315 			mctp_frag_queue(key, skb);
316 
317 			/* if the key_add fails, we've raced with another
318 			 * SOM packet with the same src, dest and tag. There's
319 			 * no way to distinguish future packets, so all we
320 			 * can do is drop; we'll free the skb on exit from
321 			 * this function.
322 			 */
323 			rc = mctp_key_add(key, msk);
324 			if (rc)
325 				kfree(key);
326 
327 		} else {
328 			/* existing key: start reassembly */
329 			spin_lock_irqsave(&key->reasm_lock, f);
330 
331 			if (key->reasm_head || key->reasm_dead) {
332 				/* duplicate start? drop everything */
333 				__mctp_key_unlock_drop(key, net, f);
334 				rc = -EEXIST;
335 			} else {
336 				rc = mctp_frag_queue(key, skb);
337 				spin_unlock_irqrestore(&key->reasm_lock, f);
338 			}
339 		}
340 
341 	} else if (key) {
342 		/* this packet continues a previous message; reassemble
343 		 * using the message-specific key
344 		 */
345 
346 		spin_lock_irqsave(&key->reasm_lock, f);
347 
348 		/* we need to be continuing an existing reassembly... */
349 		if (!key->reasm_head)
350 			rc = -EINVAL;
351 		else
352 			rc = mctp_frag_queue(key, skb);
353 
354 		/* end of message? deliver to socket, and we're done with
355 		 * the reassembly/response key
356 		 */
357 		if (!rc && flags & MCTP_HDR_FLAG_EOM) {
358 			sock_queue_rcv_skb(key->sk, key->reasm_head);
359 			key->reasm_head = NULL;
360 			__mctp_key_unlock_drop(key, net, f);
361 		} else {
362 			spin_unlock_irqrestore(&key->reasm_lock, f);
363 		}
364 
365 	} else {
366 		/* not a start, no matching key */
367 		rc = -ENOENT;
368 	}
369 
370 out_unlock:
371 	rcu_read_unlock();
372 out:
373 	if (rc)
374 		kfree_skb(skb);
375 	return rc;
376 }
377 
mctp_route_mtu(struct mctp_route * rt)378 static unsigned int mctp_route_mtu(struct mctp_route *rt)
379 {
380 	return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
381 }
382 
mctp_route_output(struct mctp_route * route,struct sk_buff * skb)383 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
384 {
385 	struct mctp_hdr *hdr = mctp_hdr(skb);
386 	char daddr_buf[MAX_ADDR_LEN];
387 	char *daddr = NULL;
388 	unsigned int mtu;
389 	int rc;
390 
391 	skb->protocol = htons(ETH_P_MCTP);
392 
393 	mtu = READ_ONCE(skb->dev->mtu);
394 	if (skb->len > mtu) {
395 		kfree_skb(skb);
396 		return -EMSGSIZE;
397 	}
398 
399 	/* If lookup fails let the device handle daddr==NULL */
400 	if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
401 		daddr = daddr_buf;
402 
403 	rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
404 			     daddr, skb->dev->dev_addr, skb->len);
405 	if (rc < 0) {
406 		kfree_skb(skb);
407 		return -EHOSTUNREACH;
408 	}
409 
410 	rc = dev_queue_xmit(skb);
411 	if (rc)
412 		rc = net_xmit_errno(rc);
413 
414 	return rc;
415 }
416 
417 /* route alloc/release */
mctp_route_release(struct mctp_route * rt)418 static void mctp_route_release(struct mctp_route *rt)
419 {
420 	if (refcount_dec_and_test(&rt->refs)) {
421 		dev_put(rt->dev->dev);
422 		kfree_rcu(rt, rcu);
423 	}
424 }
425 
426 /* returns a route with the refcount at 1 */
mctp_route_alloc(void)427 static struct mctp_route *mctp_route_alloc(void)
428 {
429 	struct mctp_route *rt;
430 
431 	rt = kzalloc(sizeof(*rt), GFP_KERNEL);
432 	if (!rt)
433 		return NULL;
434 
435 	INIT_LIST_HEAD(&rt->list);
436 	refcount_set(&rt->refs, 1);
437 	rt->output = mctp_route_discard;
438 
439 	return rt;
440 }
441 
mctp_default_net(struct net * net)442 unsigned int mctp_default_net(struct net *net)
443 {
444 	return READ_ONCE(net->mctp.default_net);
445 }
446 
mctp_default_net_set(struct net * net,unsigned int index)447 int mctp_default_net_set(struct net *net, unsigned int index)
448 {
449 	if (index == 0)
450 		return -EINVAL;
451 	WRITE_ONCE(net->mctp.default_net, index);
452 	return 0;
453 }
454 
455 /* tag management */
mctp_reserve_tag(struct net * net,struct mctp_sk_key * key,struct mctp_sock * msk)456 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
457 			     struct mctp_sock *msk)
458 {
459 	struct netns_mctp *mns = &net->mctp;
460 
461 	lockdep_assert_held(&mns->keys_lock);
462 
463 	/* we hold the net->key_lock here, allowing updates to both
464 	 * then net and sk
465 	 */
466 	hlist_add_head_rcu(&key->hlist, &mns->keys);
467 	hlist_add_head_rcu(&key->sklist, &msk->keys);
468 }
469 
470 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
471  * it for the socket msk
472  */
mctp_alloc_local_tag(struct mctp_sock * msk,mctp_eid_t saddr,mctp_eid_t daddr,u8 * tagp)473 static int mctp_alloc_local_tag(struct mctp_sock *msk,
474 				mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
475 {
476 	struct net *net = sock_net(&msk->sk);
477 	struct netns_mctp *mns = &net->mctp;
478 	struct mctp_sk_key *key, *tmp;
479 	unsigned long flags;
480 	int rc = -EAGAIN;
481 	u8 tagbits;
482 
483 	/* for NULL destination EIDs, we may get a response from any peer */
484 	if (daddr == MCTP_ADDR_NULL)
485 		daddr = MCTP_ADDR_ANY;
486 
487 	/* be optimistic, alloc now */
488 	key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
489 	if (!key)
490 		return -ENOMEM;
491 
492 	/* 8 possible tag values */
493 	tagbits = 0xff;
494 
495 	spin_lock_irqsave(&mns->keys_lock, flags);
496 
497 	/* Walk through the existing keys, looking for potential conflicting
498 	 * tags. If we find a conflict, clear that bit from tagbits
499 	 */
500 	hlist_for_each_entry(tmp, &mns->keys, hlist) {
501 		/* if we don't own the tag, it can't conflict */
502 		if (tmp->tag & MCTP_HDR_FLAG_TO)
503 			continue;
504 
505 		if ((tmp->peer_addr == daddr ||
506 		     tmp->peer_addr == MCTP_ADDR_ANY) &&
507 		    tmp->local_addr == saddr)
508 			tagbits &= ~(1 << tmp->tag);
509 
510 		if (!tagbits)
511 			break;
512 	}
513 
514 	if (tagbits) {
515 		key->tag = __ffs(tagbits);
516 		mctp_reserve_tag(net, key, msk);
517 		*tagp = key->tag;
518 		rc = 0;
519 	}
520 
521 	spin_unlock_irqrestore(&mns->keys_lock, flags);
522 
523 	if (!tagbits)
524 		kfree(key);
525 
526 	return rc;
527 }
528 
529 /* routing lookups */
mctp_rt_match_eid(struct mctp_route * rt,unsigned int net,mctp_eid_t eid)530 static bool mctp_rt_match_eid(struct mctp_route *rt,
531 			      unsigned int net, mctp_eid_t eid)
532 {
533 	return READ_ONCE(rt->dev->net) == net &&
534 		rt->min <= eid && rt->max >= eid;
535 }
536 
537 /* compares match, used for duplicate prevention */
mctp_rt_compare_exact(struct mctp_route * rt1,struct mctp_route * rt2)538 static bool mctp_rt_compare_exact(struct mctp_route *rt1,
539 				  struct mctp_route *rt2)
540 {
541 	ASSERT_RTNL();
542 	return rt1->dev->net == rt2->dev->net &&
543 		rt1->min == rt2->min &&
544 		rt1->max == rt2->max;
545 }
546 
mctp_route_lookup(struct net * net,unsigned int dnet,mctp_eid_t daddr)547 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
548 				     mctp_eid_t daddr)
549 {
550 	struct mctp_route *tmp, *rt = NULL;
551 
552 	rcu_read_lock();
553 
554 	list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
555 		/* TODO: add metrics */
556 		if (mctp_rt_match_eid(tmp, dnet, daddr)) {
557 			if (refcount_inc_not_zero(&tmp->refs)) {
558 				rt = tmp;
559 				break;
560 			}
561 		}
562 	}
563 
564 	rcu_read_unlock();
565 
566 	return rt;
567 }
568 
mctp_route_lookup_null(struct net * net,struct net_device * dev)569 static struct mctp_route *mctp_route_lookup_null(struct net *net,
570 						 struct net_device *dev)
571 {
572 	struct mctp_route *tmp, *rt = NULL;
573 
574 	rcu_read_lock();
575 
576 	list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
577 		if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
578 		    refcount_inc_not_zero(&tmp->refs)) {
579 			rt = tmp;
580 			break;
581 		}
582 	}
583 
584 	rcu_read_unlock();
585 
586 	return rt;
587 }
588 
589 /* sends a skb to rt and releases the route. */
mctp_do_route(struct mctp_route * rt,struct sk_buff * skb)590 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
591 {
592 	int rc;
593 
594 	rc = rt->output(rt, skb);
595 	mctp_route_release(rt);
596 	return rc;
597 }
598 
mctp_do_fragment_route(struct mctp_route * rt,struct sk_buff * skb,unsigned int mtu,u8 tag)599 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
600 				  unsigned int mtu, u8 tag)
601 {
602 	const unsigned int hlen = sizeof(struct mctp_hdr);
603 	struct mctp_hdr *hdr, *hdr2;
604 	unsigned int pos, size;
605 	struct sk_buff *skb2;
606 	int rc;
607 	u8 seq;
608 
609 	hdr = mctp_hdr(skb);
610 	seq = 0;
611 	rc = 0;
612 
613 	if (mtu < hlen + 1) {
614 		kfree_skb(skb);
615 		return -EMSGSIZE;
616 	}
617 
618 	/* we've got the header */
619 	skb_pull(skb, hlen);
620 
621 	for (pos = 0; pos < skb->len;) {
622 		/* size of message payload */
623 		size = min(mtu - hlen, skb->len - pos);
624 
625 		skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
626 		if (!skb2) {
627 			rc = -ENOMEM;
628 			break;
629 		}
630 
631 		/* generic skb copy */
632 		skb2->protocol = skb->protocol;
633 		skb2->priority = skb->priority;
634 		skb2->dev = skb->dev;
635 		memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
636 
637 		if (skb->sk)
638 			skb_set_owner_w(skb2, skb->sk);
639 
640 		/* establish packet */
641 		skb_reserve(skb2, MCTP_HEADER_MAXLEN);
642 		skb_reset_network_header(skb2);
643 		skb_put(skb2, hlen + size);
644 		skb2->transport_header = skb2->network_header + hlen;
645 
646 		/* copy header fields, calculate SOM/EOM flags & seq */
647 		hdr2 = mctp_hdr(skb2);
648 		hdr2->ver = hdr->ver;
649 		hdr2->dest = hdr->dest;
650 		hdr2->src = hdr->src;
651 		hdr2->flags_seq_tag = tag &
652 			(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
653 
654 		if (pos == 0)
655 			hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
656 
657 		if (pos + size == skb->len)
658 			hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
659 
660 		hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
661 
662 		/* copy message payload */
663 		skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
664 
665 		/* do route, but don't drop the rt reference */
666 		rc = rt->output(rt, skb2);
667 		if (rc)
668 			break;
669 
670 		seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
671 		pos += size;
672 	}
673 
674 	mctp_route_release(rt);
675 	consume_skb(skb);
676 	return rc;
677 }
678 
mctp_local_output(struct sock * sk,struct mctp_route * rt,struct sk_buff * skb,mctp_eid_t daddr,u8 req_tag)679 int mctp_local_output(struct sock *sk, struct mctp_route *rt,
680 		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
681 {
682 	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
683 	struct mctp_skb_cb *cb = mctp_cb(skb);
684 	struct mctp_hdr *hdr;
685 	unsigned long flags;
686 	unsigned int mtu;
687 	mctp_eid_t saddr;
688 	int rc;
689 	u8 tag;
690 
691 	if (WARN_ON(!rt->dev))
692 		return -EINVAL;
693 
694 	spin_lock_irqsave(&rt->dev->addrs_lock, flags);
695 	if (rt->dev->num_addrs == 0) {
696 		rc = -EHOSTUNREACH;
697 	} else {
698 		/* use the outbound interface's first address as our source */
699 		saddr = rt->dev->addrs[0];
700 		rc = 0;
701 	}
702 	spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
703 
704 	if (rc)
705 		return rc;
706 
707 	if (req_tag & MCTP_HDR_FLAG_TO) {
708 		rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
709 		if (rc)
710 			return rc;
711 		tag |= MCTP_HDR_FLAG_TO;
712 	} else {
713 		tag = req_tag;
714 	}
715 
716 
717 	skb->protocol = htons(ETH_P_MCTP);
718 	skb->priority = 0;
719 	skb_reset_transport_header(skb);
720 	skb_push(skb, sizeof(struct mctp_hdr));
721 	skb_reset_network_header(skb);
722 	skb->dev = rt->dev->dev;
723 
724 	/* cb->net will have been set on initial ingress */
725 	cb->src = saddr;
726 
727 	/* set up common header fields */
728 	hdr = mctp_hdr(skb);
729 	hdr->ver = 1;
730 	hdr->dest = daddr;
731 	hdr->src = saddr;
732 
733 	mtu = mctp_route_mtu(rt);
734 
735 	if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
736 		hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
737 			tag;
738 		return mctp_do_route(rt, skb);
739 	} else {
740 		return mctp_do_fragment_route(rt, skb, mtu, tag);
741 	}
742 }
743 
744 /* route management */
mctp_route_add(struct mctp_dev * mdev,mctp_eid_t daddr_start,unsigned int daddr_extent,unsigned int mtu,unsigned char type)745 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
746 			  unsigned int daddr_extent, unsigned int mtu,
747 			  unsigned char type)
748 {
749 	int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
750 	struct net *net = dev_net(mdev->dev);
751 	struct mctp_route *rt, *ert;
752 
753 	if (!mctp_address_ok(daddr_start))
754 		return -EINVAL;
755 
756 	if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
757 		return -EINVAL;
758 
759 	switch (type) {
760 	case RTN_LOCAL:
761 		rtfn = mctp_route_input;
762 		break;
763 	case RTN_UNICAST:
764 		rtfn = mctp_route_output;
765 		break;
766 	default:
767 		return -EINVAL;
768 	}
769 
770 	rt = mctp_route_alloc();
771 	if (!rt)
772 		return -ENOMEM;
773 
774 	rt->min = daddr_start;
775 	rt->max = daddr_start + daddr_extent;
776 	rt->mtu = mtu;
777 	rt->dev = mdev;
778 	dev_hold(rt->dev->dev);
779 	rt->type = type;
780 	rt->output = rtfn;
781 
782 	ASSERT_RTNL();
783 	/* Prevent duplicate identical routes. */
784 	list_for_each_entry(ert, &net->mctp.routes, list) {
785 		if (mctp_rt_compare_exact(rt, ert)) {
786 			mctp_route_release(rt);
787 			return -EEXIST;
788 		}
789 	}
790 
791 	list_add_rcu(&rt->list, &net->mctp.routes);
792 
793 	return 0;
794 }
795 
mctp_route_remove(struct mctp_dev * mdev,mctp_eid_t daddr_start,unsigned int daddr_extent,unsigned char type)796 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
797 			     unsigned int daddr_extent, unsigned char type)
798 {
799 	struct net *net = dev_net(mdev->dev);
800 	struct mctp_route *rt, *tmp;
801 	mctp_eid_t daddr_end;
802 	bool dropped;
803 
804 	if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
805 		return -EINVAL;
806 
807 	daddr_end = daddr_start + daddr_extent;
808 	dropped = false;
809 
810 	ASSERT_RTNL();
811 
812 	list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
813 		if (rt->dev == mdev &&
814 		    rt->min == daddr_start && rt->max == daddr_end &&
815 		    rt->type == type) {
816 			list_del_rcu(&rt->list);
817 			/* TODO: immediate RTM_DELROUTE */
818 			mctp_route_release(rt);
819 			dropped = true;
820 		}
821 	}
822 
823 	return dropped ? 0 : -ENOENT;
824 }
825 
mctp_route_add_local(struct mctp_dev * mdev,mctp_eid_t addr)826 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
827 {
828 	return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
829 }
830 
mctp_route_remove_local(struct mctp_dev * mdev,mctp_eid_t addr)831 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
832 {
833 	return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
834 }
835 
836 /* removes all entries for a given device */
mctp_route_remove_dev(struct mctp_dev * mdev)837 void mctp_route_remove_dev(struct mctp_dev *mdev)
838 {
839 	struct net *net = dev_net(mdev->dev);
840 	struct mctp_route *rt, *tmp;
841 
842 	ASSERT_RTNL();
843 	list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
844 		if (rt->dev == mdev) {
845 			list_del_rcu(&rt->list);
846 			/* TODO: immediate RTM_DELROUTE */
847 			mctp_route_release(rt);
848 		}
849 	}
850 }
851 
852 /* Incoming packet-handling */
853 
mctp_pkttype_receive(struct sk_buff * skb,struct net_device * dev,struct packet_type * pt,struct net_device * orig_dev)854 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
855 				struct packet_type *pt,
856 				struct net_device *orig_dev)
857 {
858 	struct net *net = dev_net(dev);
859 	struct mctp_skb_cb *cb;
860 	struct mctp_route *rt;
861 	struct mctp_hdr *mh;
862 
863 	/* basic non-data sanity checks */
864 	if (dev->type != ARPHRD_MCTP)
865 		goto err_drop;
866 
867 	if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
868 		goto err_drop;
869 
870 	skb_reset_transport_header(skb);
871 	skb_reset_network_header(skb);
872 
873 	/* We have enough for a header; decode and route */
874 	mh = mctp_hdr(skb);
875 	if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
876 		goto err_drop;
877 
878 	cb = __mctp_cb(skb);
879 	rcu_read_lock();
880 	cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
881 	rcu_read_unlock();
882 
883 	rt = mctp_route_lookup(net, cb->net, mh->dest);
884 
885 	/* NULL EID, but addressed to our physical address */
886 	if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
887 		rt = mctp_route_lookup_null(net, dev);
888 
889 	if (!rt)
890 		goto err_drop;
891 
892 	mctp_do_route(rt, skb);
893 
894 	return NET_RX_SUCCESS;
895 
896 err_drop:
897 	kfree_skb(skb);
898 	return NET_RX_DROP;
899 }
900 
901 static struct packet_type mctp_packet_type = {
902 	.type = cpu_to_be16(ETH_P_MCTP),
903 	.func = mctp_pkttype_receive,
904 };
905 
906 /* netlink interface */
907 
908 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
909 	[RTA_DST]		= { .type = NLA_U8 },
910 	[RTA_METRICS]		= { .type = NLA_NESTED },
911 	[RTA_OIF]		= { .type = NLA_U32 },
912 };
913 
914 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
915  * tb must hold RTA_MAX+1 elements.
916  */
mctp_route_nlparse(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr ** tb,struct rtmsg ** rtm,struct mctp_dev ** mdev,mctp_eid_t * daddr_start)917 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
918 			      struct netlink_ext_ack *extack,
919 			      struct nlattr **tb, struct rtmsg **rtm,
920 			      struct mctp_dev **mdev, mctp_eid_t *daddr_start)
921 {
922 	struct net *net = sock_net(skb->sk);
923 	struct net_device *dev;
924 	unsigned int ifindex;
925 	int rc;
926 
927 	rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
928 			 rta_mctp_policy, extack);
929 	if (rc < 0) {
930 		NL_SET_ERR_MSG(extack, "incorrect format");
931 		return rc;
932 	}
933 
934 	if (!tb[RTA_DST]) {
935 		NL_SET_ERR_MSG(extack, "dst EID missing");
936 		return -EINVAL;
937 	}
938 	*daddr_start = nla_get_u8(tb[RTA_DST]);
939 
940 	if (!tb[RTA_OIF]) {
941 		NL_SET_ERR_MSG(extack, "ifindex missing");
942 		return -EINVAL;
943 	}
944 	ifindex = nla_get_u32(tb[RTA_OIF]);
945 
946 	*rtm = nlmsg_data(nlh);
947 	if ((*rtm)->rtm_family != AF_MCTP) {
948 		NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
949 		return -EINVAL;
950 	}
951 
952 	dev = __dev_get_by_index(net, ifindex);
953 	if (!dev) {
954 		NL_SET_ERR_MSG(extack, "bad ifindex");
955 		return -ENODEV;
956 	}
957 	*mdev = mctp_dev_get_rtnl(dev);
958 	if (!*mdev)
959 		return -ENODEV;
960 
961 	if (dev->flags & IFF_LOOPBACK) {
962 		NL_SET_ERR_MSG(extack, "no routes to loopback");
963 		return -EINVAL;
964 	}
965 
966 	return 0;
967 }
968 
mctp_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)969 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
970 			 struct netlink_ext_ack *extack)
971 {
972 	struct nlattr *tb[RTA_MAX + 1];
973 	mctp_eid_t daddr_start;
974 	struct mctp_dev *mdev;
975 	struct rtmsg *rtm;
976 	unsigned int mtu;
977 	int rc;
978 
979 	rc = mctp_route_nlparse(skb, nlh, extack, tb,
980 				&rtm, &mdev, &daddr_start);
981 	if (rc < 0)
982 		return rc;
983 
984 	if (rtm->rtm_type != RTN_UNICAST) {
985 		NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
986 		return -EINVAL;
987 	}
988 
989 	/* TODO: parse mtu from nlparse */
990 	mtu = 0;
991 
992 	if (rtm->rtm_type != RTN_UNICAST)
993 		return -EINVAL;
994 
995 	rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
996 			    rtm->rtm_type);
997 	return rc;
998 }
999 
mctp_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1000 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1001 			 struct netlink_ext_ack *extack)
1002 {
1003 	struct nlattr *tb[RTA_MAX + 1];
1004 	mctp_eid_t daddr_start;
1005 	struct mctp_dev *mdev;
1006 	struct rtmsg *rtm;
1007 	int rc;
1008 
1009 	rc = mctp_route_nlparse(skb, nlh, extack, tb,
1010 				&rtm, &mdev, &daddr_start);
1011 	if (rc < 0)
1012 		return rc;
1013 
1014 	/* we only have unicast routes */
1015 	if (rtm->rtm_type != RTN_UNICAST)
1016 		return -EINVAL;
1017 
1018 	rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
1019 	return rc;
1020 }
1021 
mctp_fill_rtinfo(struct sk_buff * skb,struct mctp_route * rt,u32 portid,u32 seq,int event,unsigned int flags)1022 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1023 			    u32 portid, u32 seq, int event, unsigned int flags)
1024 {
1025 	struct nlmsghdr *nlh;
1026 	struct rtmsg *hdr;
1027 	void *metrics;
1028 
1029 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1030 	if (!nlh)
1031 		return -EMSGSIZE;
1032 
1033 	hdr = nlmsg_data(nlh);
1034 	hdr->rtm_family = AF_MCTP;
1035 
1036 	/* we use the _len fields as a number of EIDs, rather than
1037 	 * a number of bits in the address
1038 	 */
1039 	hdr->rtm_dst_len = rt->max - rt->min;
1040 	hdr->rtm_src_len = 0;
1041 	hdr->rtm_tos = 0;
1042 	hdr->rtm_table = RT_TABLE_DEFAULT;
1043 	hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1044 	hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
1045 	hdr->rtm_type = rt->type;
1046 
1047 	if (nla_put_u8(skb, RTA_DST, rt->min))
1048 		goto cancel;
1049 
1050 	metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1051 	if (!metrics)
1052 		goto cancel;
1053 
1054 	if (rt->mtu) {
1055 		if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1056 			goto cancel;
1057 	}
1058 
1059 	nla_nest_end(skb, metrics);
1060 
1061 	if (rt->dev) {
1062 		if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1063 			goto cancel;
1064 	}
1065 
1066 	/* TODO: conditional neighbour physaddr? */
1067 
1068 	nlmsg_end(skb, nlh);
1069 
1070 	return 0;
1071 
1072 cancel:
1073 	nlmsg_cancel(skb, nlh);
1074 	return -EMSGSIZE;
1075 }
1076 
mctp_dump_rtinfo(struct sk_buff * skb,struct netlink_callback * cb)1077 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1078 {
1079 	struct net *net = sock_net(skb->sk);
1080 	struct mctp_route *rt;
1081 	int s_idx, idx;
1082 
1083 	/* TODO: allow filtering on route data, possibly under
1084 	 * cb->strict_check
1085 	 */
1086 
1087 	/* TODO: change to struct overlay */
1088 	s_idx = cb->args[0];
1089 	idx = 0;
1090 
1091 	rcu_read_lock();
1092 	list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1093 		if (idx++ < s_idx)
1094 			continue;
1095 		if (mctp_fill_rtinfo(skb, rt,
1096 				     NETLINK_CB(cb->skb).portid,
1097 				     cb->nlh->nlmsg_seq,
1098 				     RTM_NEWROUTE, NLM_F_MULTI) < 0)
1099 			break;
1100 	}
1101 
1102 	rcu_read_unlock();
1103 	cb->args[0] = idx;
1104 
1105 	return skb->len;
1106 }
1107 
1108 /* net namespace implementation */
mctp_routes_net_init(struct net * net)1109 static int __net_init mctp_routes_net_init(struct net *net)
1110 {
1111 	struct netns_mctp *ns = &net->mctp;
1112 
1113 	INIT_LIST_HEAD(&ns->routes);
1114 	INIT_HLIST_HEAD(&ns->binds);
1115 	mutex_init(&ns->bind_lock);
1116 	INIT_HLIST_HEAD(&ns->keys);
1117 	spin_lock_init(&ns->keys_lock);
1118 	WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1119 	return 0;
1120 }
1121 
mctp_routes_net_exit(struct net * net)1122 static void __net_exit mctp_routes_net_exit(struct net *net)
1123 {
1124 	struct mctp_route *rt;
1125 
1126 	rcu_read_lock();
1127 	list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1128 		mctp_route_release(rt);
1129 	rcu_read_unlock();
1130 }
1131 
1132 static struct pernet_operations mctp_net_ops = {
1133 	.init = mctp_routes_net_init,
1134 	.exit = mctp_routes_net_exit,
1135 };
1136 
mctp_routes_init(void)1137 int __init mctp_routes_init(void)
1138 {
1139 	dev_add_pack(&mctp_packet_type);
1140 
1141 	rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
1142 			     NULL, mctp_dump_rtinfo, 0);
1143 	rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
1144 			     mctp_newroute, NULL, 0);
1145 	rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
1146 			     mctp_delroute, NULL, 0);
1147 
1148 	return register_pernet_subsys(&mctp_net_ops);
1149 }
1150 
mctp_routes_exit(void)1151 void mctp_routes_exit(void)
1152 {
1153 	unregister_pernet_subsys(&mctp_net_ops);
1154 	rtnl_unregister(PF_MCTP, RTM_DELROUTE);
1155 	rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
1156 	rtnl_unregister(PF_MCTP, RTM_GETROUTE);
1157 	dev_remove_pack(&mctp_packet_type);
1158 }
1159