1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Management Component Transport Protocol (MCTP) - routing
4 * implementation.
5 *
6 * This is currently based on a simple routing table, with no dst cache. The
7 * number of routes should stay fairly small, so the lookup cost is small.
8 *
9 * Copyright (c) 2021 Code Construct
10 * Copyright (c) 2021 Google
11 */
12
13 #include <linux/idr.h>
14 #include <linux/mctp.h>
15 #include <linux/netdevice.h>
16 #include <linux/rtnetlink.h>
17 #include <linux/skbuff.h>
18
19 #include <uapi/linux/if_arp.h>
20
21 #include <net/mctp.h>
22 #include <net/mctpdevice.h>
23 #include <net/netlink.h>
24 #include <net/sock.h>
25
26 static const unsigned int mctp_message_maxlen = 64 * 1024;
27
28 /* route output callbacks */
mctp_route_discard(struct mctp_route * route,struct sk_buff * skb)29 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
30 {
31 kfree_skb(skb);
32 return 0;
33 }
34
mctp_lookup_bind(struct net * net,struct sk_buff * skb)35 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
36 {
37 struct mctp_skb_cb *cb = mctp_cb(skb);
38 struct mctp_hdr *mh;
39 struct sock *sk;
40 u8 type;
41
42 WARN_ON(!rcu_read_lock_held());
43
44 /* TODO: look up in skb->cb? */
45 mh = mctp_hdr(skb);
46
47 if (!skb_headlen(skb))
48 return NULL;
49
50 type = (*(u8 *)skb->data) & 0x7f;
51
52 sk_for_each_rcu(sk, &net->mctp.binds) {
53 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
54
55 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
56 continue;
57
58 if (msk->bind_type != type)
59 continue;
60
61 if (msk->bind_addr != MCTP_ADDR_ANY &&
62 msk->bind_addr != mh->dest)
63 continue;
64
65 return msk;
66 }
67
68 return NULL;
69 }
70
mctp_key_match(struct mctp_sk_key * key,mctp_eid_t local,mctp_eid_t peer,u8 tag)71 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
72 mctp_eid_t peer, u8 tag)
73 {
74 if (key->local_addr != local)
75 return false;
76
77 if (key->peer_addr != peer)
78 return false;
79
80 if (key->tag != tag)
81 return false;
82
83 return true;
84 }
85
mctp_lookup_key(struct net * net,struct sk_buff * skb,mctp_eid_t peer)86 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
87 mctp_eid_t peer)
88 {
89 struct mctp_sk_key *key, *ret;
90 struct mctp_hdr *mh;
91 u8 tag;
92
93 WARN_ON(!rcu_read_lock_held());
94
95 mh = mctp_hdr(skb);
96 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
97
98 ret = NULL;
99
100 hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
101 if (mctp_key_match(key, mh->dest, peer, tag)) {
102 ret = key;
103 break;
104 }
105 }
106
107 return ret;
108 }
109
mctp_key_alloc(struct mctp_sock * msk,mctp_eid_t local,mctp_eid_t peer,u8 tag,gfp_t gfp)110 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
111 mctp_eid_t local, mctp_eid_t peer,
112 u8 tag, gfp_t gfp)
113 {
114 struct mctp_sk_key *key;
115
116 key = kzalloc(sizeof(*key), gfp);
117 if (!key)
118 return NULL;
119
120 key->peer_addr = peer;
121 key->local_addr = local;
122 key->tag = tag;
123 key->sk = &msk->sk;
124 spin_lock_init(&key->reasm_lock);
125
126 return key;
127 }
128
mctp_key_add(struct mctp_sk_key * key,struct mctp_sock * msk)129 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
130 {
131 struct net *net = sock_net(&msk->sk);
132 struct mctp_sk_key *tmp;
133 unsigned long flags;
134 int rc = 0;
135
136 spin_lock_irqsave(&net->mctp.keys_lock, flags);
137
138 if (sock_flag(&msk->sk, SOCK_DEAD)) {
139 rc = -EINVAL;
140 goto out_unlock;
141 }
142
143 hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
144 if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
145 key->tag)) {
146 rc = -EEXIST;
147 break;
148 }
149 }
150
151 if (!rc) {
152 hlist_add_head(&key->hlist, &net->mctp.keys);
153 hlist_add_head(&key->sklist, &msk->keys);
154 }
155
156 out_unlock:
157 spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
158
159 return rc;
160 }
161
162 /* Must be called with key->reasm_lock, which it will release. Will schedule
163 * the key for an RCU free.
164 */
__mctp_key_unlock_drop(struct mctp_sk_key * key,struct net * net,unsigned long flags)165 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
166 unsigned long flags)
167 __releases(&key->reasm_lock)
168 {
169 struct sk_buff *skb;
170
171 skb = key->reasm_head;
172 key->reasm_head = NULL;
173 key->reasm_dead = true;
174 spin_unlock_irqrestore(&key->reasm_lock, flags);
175
176 spin_lock_irqsave(&net->mctp.keys_lock, flags);
177 hlist_del_rcu(&key->hlist);
178 hlist_del_rcu(&key->sklist);
179 spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
180 kfree_rcu(key, rcu);
181
182 if (skb)
183 kfree_skb(skb);
184 }
185
mctp_frag_queue(struct mctp_sk_key * key,struct sk_buff * skb)186 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
187 {
188 struct mctp_hdr *hdr = mctp_hdr(skb);
189 u8 exp_seq, this_seq;
190
191 this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
192 & MCTP_HDR_SEQ_MASK;
193
194 if (!key->reasm_head) {
195 key->reasm_head = skb;
196 key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
197 key->last_seq = this_seq;
198 return 0;
199 }
200
201 exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
202
203 if (this_seq != exp_seq)
204 return -EINVAL;
205
206 if (key->reasm_head->len + skb->len > mctp_message_maxlen)
207 return -EINVAL;
208
209 skb->next = NULL;
210 skb->sk = NULL;
211 *key->reasm_tailp = skb;
212 key->reasm_tailp = &skb->next;
213
214 key->last_seq = this_seq;
215
216 key->reasm_head->data_len += skb->len;
217 key->reasm_head->len += skb->len;
218 key->reasm_head->truesize += skb->truesize;
219
220 return 0;
221 }
222
mctp_route_input(struct mctp_route * route,struct sk_buff * skb)223 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
224 {
225 struct net *net = dev_net(skb->dev);
226 struct mctp_sk_key *key;
227 struct mctp_sock *msk;
228 struct mctp_hdr *mh;
229 unsigned long f;
230 u8 tag, flags;
231 int rc;
232
233 msk = NULL;
234 rc = -EINVAL;
235
236 /* we may be receiving a locally-routed packet; drop source sk
237 * accounting
238 */
239 skb_orphan(skb);
240
241 /* ensure we have enough data for a header and a type */
242 if (skb->len < sizeof(struct mctp_hdr) + 1)
243 goto out;
244
245 /* grab header, advance data ptr */
246 mh = mctp_hdr(skb);
247 skb_pull(skb, sizeof(struct mctp_hdr));
248
249 if (mh->ver != 1)
250 goto out;
251
252 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
253 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
254
255 rcu_read_lock();
256
257 /* lookup socket / reasm context, exactly matching (src,dest,tag) */
258 key = mctp_lookup_key(net, skb, mh->src);
259
260 if (flags & MCTP_HDR_FLAG_SOM) {
261 if (key) {
262 msk = container_of(key->sk, struct mctp_sock, sk);
263 } else {
264 /* first response to a broadcast? do a more general
265 * key lookup to find the socket, but don't use this
266 * key for reassembly - we'll create a more specific
267 * one for future packets if required (ie, !EOM).
268 */
269 key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
270 if (key) {
271 msk = container_of(key->sk,
272 struct mctp_sock, sk);
273 key = NULL;
274 }
275 }
276
277 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
278 msk = mctp_lookup_bind(net, skb);
279
280 if (!msk) {
281 rc = -ENOENT;
282 goto out_unlock;
283 }
284
285 /* single-packet message? deliver to socket, clean up any
286 * pending key.
287 */
288 if (flags & MCTP_HDR_FLAG_EOM) {
289 sock_queue_rcv_skb(&msk->sk, skb);
290 if (key) {
291 spin_lock_irqsave(&key->reasm_lock, f);
292 /* we've hit a pending reassembly; not much we
293 * can do but drop it
294 */
295 __mctp_key_unlock_drop(key, net, f);
296 }
297 rc = 0;
298 goto out_unlock;
299 }
300
301 /* broadcast response or a bind() - create a key for further
302 * packets for this message
303 */
304 if (!key) {
305 key = mctp_key_alloc(msk, mh->dest, mh->src,
306 tag, GFP_ATOMIC);
307 if (!key) {
308 rc = -ENOMEM;
309 goto out_unlock;
310 }
311
312 /* we can queue without the reasm lock here, as the
313 * key isn't observable yet
314 */
315 mctp_frag_queue(key, skb);
316
317 /* if the key_add fails, we've raced with another
318 * SOM packet with the same src, dest and tag. There's
319 * no way to distinguish future packets, so all we
320 * can do is drop; we'll free the skb on exit from
321 * this function.
322 */
323 rc = mctp_key_add(key, msk);
324 if (rc)
325 kfree(key);
326
327 } else {
328 /* existing key: start reassembly */
329 spin_lock_irqsave(&key->reasm_lock, f);
330
331 if (key->reasm_head || key->reasm_dead) {
332 /* duplicate start? drop everything */
333 __mctp_key_unlock_drop(key, net, f);
334 rc = -EEXIST;
335 } else {
336 rc = mctp_frag_queue(key, skb);
337 spin_unlock_irqrestore(&key->reasm_lock, f);
338 }
339 }
340
341 } else if (key) {
342 /* this packet continues a previous message; reassemble
343 * using the message-specific key
344 */
345
346 spin_lock_irqsave(&key->reasm_lock, f);
347
348 /* we need to be continuing an existing reassembly... */
349 if (!key->reasm_head)
350 rc = -EINVAL;
351 else
352 rc = mctp_frag_queue(key, skb);
353
354 /* end of message? deliver to socket, and we're done with
355 * the reassembly/response key
356 */
357 if (!rc && flags & MCTP_HDR_FLAG_EOM) {
358 sock_queue_rcv_skb(key->sk, key->reasm_head);
359 key->reasm_head = NULL;
360 __mctp_key_unlock_drop(key, net, f);
361 } else {
362 spin_unlock_irqrestore(&key->reasm_lock, f);
363 }
364
365 } else {
366 /* not a start, no matching key */
367 rc = -ENOENT;
368 }
369
370 out_unlock:
371 rcu_read_unlock();
372 out:
373 if (rc)
374 kfree_skb(skb);
375 return rc;
376 }
377
mctp_route_mtu(struct mctp_route * rt)378 static unsigned int mctp_route_mtu(struct mctp_route *rt)
379 {
380 return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
381 }
382
mctp_route_output(struct mctp_route * route,struct sk_buff * skb)383 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
384 {
385 struct mctp_hdr *hdr = mctp_hdr(skb);
386 char daddr_buf[MAX_ADDR_LEN];
387 char *daddr = NULL;
388 unsigned int mtu;
389 int rc;
390
391 skb->protocol = htons(ETH_P_MCTP);
392
393 mtu = READ_ONCE(skb->dev->mtu);
394 if (skb->len > mtu) {
395 kfree_skb(skb);
396 return -EMSGSIZE;
397 }
398
399 /* If lookup fails let the device handle daddr==NULL */
400 if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
401 daddr = daddr_buf;
402
403 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
404 daddr, skb->dev->dev_addr, skb->len);
405 if (rc < 0) {
406 kfree_skb(skb);
407 return -EHOSTUNREACH;
408 }
409
410 rc = dev_queue_xmit(skb);
411 if (rc)
412 rc = net_xmit_errno(rc);
413
414 return rc;
415 }
416
417 /* route alloc/release */
mctp_route_release(struct mctp_route * rt)418 static void mctp_route_release(struct mctp_route *rt)
419 {
420 if (refcount_dec_and_test(&rt->refs)) {
421 dev_put(rt->dev->dev);
422 kfree_rcu(rt, rcu);
423 }
424 }
425
426 /* returns a route with the refcount at 1 */
mctp_route_alloc(void)427 static struct mctp_route *mctp_route_alloc(void)
428 {
429 struct mctp_route *rt;
430
431 rt = kzalloc(sizeof(*rt), GFP_KERNEL);
432 if (!rt)
433 return NULL;
434
435 INIT_LIST_HEAD(&rt->list);
436 refcount_set(&rt->refs, 1);
437 rt->output = mctp_route_discard;
438
439 return rt;
440 }
441
mctp_default_net(struct net * net)442 unsigned int mctp_default_net(struct net *net)
443 {
444 return READ_ONCE(net->mctp.default_net);
445 }
446
mctp_default_net_set(struct net * net,unsigned int index)447 int mctp_default_net_set(struct net *net, unsigned int index)
448 {
449 if (index == 0)
450 return -EINVAL;
451 WRITE_ONCE(net->mctp.default_net, index);
452 return 0;
453 }
454
455 /* tag management */
mctp_reserve_tag(struct net * net,struct mctp_sk_key * key,struct mctp_sock * msk)456 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
457 struct mctp_sock *msk)
458 {
459 struct netns_mctp *mns = &net->mctp;
460
461 lockdep_assert_held(&mns->keys_lock);
462
463 /* we hold the net->key_lock here, allowing updates to both
464 * then net and sk
465 */
466 hlist_add_head_rcu(&key->hlist, &mns->keys);
467 hlist_add_head_rcu(&key->sklist, &msk->keys);
468 }
469
470 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
471 * it for the socket msk
472 */
mctp_alloc_local_tag(struct mctp_sock * msk,mctp_eid_t saddr,mctp_eid_t daddr,u8 * tagp)473 static int mctp_alloc_local_tag(struct mctp_sock *msk,
474 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
475 {
476 struct net *net = sock_net(&msk->sk);
477 struct netns_mctp *mns = &net->mctp;
478 struct mctp_sk_key *key, *tmp;
479 unsigned long flags;
480 int rc = -EAGAIN;
481 u8 tagbits;
482
483 /* for NULL destination EIDs, we may get a response from any peer */
484 if (daddr == MCTP_ADDR_NULL)
485 daddr = MCTP_ADDR_ANY;
486
487 /* be optimistic, alloc now */
488 key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
489 if (!key)
490 return -ENOMEM;
491
492 /* 8 possible tag values */
493 tagbits = 0xff;
494
495 spin_lock_irqsave(&mns->keys_lock, flags);
496
497 /* Walk through the existing keys, looking for potential conflicting
498 * tags. If we find a conflict, clear that bit from tagbits
499 */
500 hlist_for_each_entry(tmp, &mns->keys, hlist) {
501 /* if we don't own the tag, it can't conflict */
502 if (tmp->tag & MCTP_HDR_FLAG_TO)
503 continue;
504
505 if ((tmp->peer_addr == daddr ||
506 tmp->peer_addr == MCTP_ADDR_ANY) &&
507 tmp->local_addr == saddr)
508 tagbits &= ~(1 << tmp->tag);
509
510 if (!tagbits)
511 break;
512 }
513
514 if (tagbits) {
515 key->tag = __ffs(tagbits);
516 mctp_reserve_tag(net, key, msk);
517 *tagp = key->tag;
518 rc = 0;
519 }
520
521 spin_unlock_irqrestore(&mns->keys_lock, flags);
522
523 if (!tagbits)
524 kfree(key);
525
526 return rc;
527 }
528
529 /* routing lookups */
mctp_rt_match_eid(struct mctp_route * rt,unsigned int net,mctp_eid_t eid)530 static bool mctp_rt_match_eid(struct mctp_route *rt,
531 unsigned int net, mctp_eid_t eid)
532 {
533 return READ_ONCE(rt->dev->net) == net &&
534 rt->min <= eid && rt->max >= eid;
535 }
536
537 /* compares match, used for duplicate prevention */
mctp_rt_compare_exact(struct mctp_route * rt1,struct mctp_route * rt2)538 static bool mctp_rt_compare_exact(struct mctp_route *rt1,
539 struct mctp_route *rt2)
540 {
541 ASSERT_RTNL();
542 return rt1->dev->net == rt2->dev->net &&
543 rt1->min == rt2->min &&
544 rt1->max == rt2->max;
545 }
546
mctp_route_lookup(struct net * net,unsigned int dnet,mctp_eid_t daddr)547 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
548 mctp_eid_t daddr)
549 {
550 struct mctp_route *tmp, *rt = NULL;
551
552 rcu_read_lock();
553
554 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
555 /* TODO: add metrics */
556 if (mctp_rt_match_eid(tmp, dnet, daddr)) {
557 if (refcount_inc_not_zero(&tmp->refs)) {
558 rt = tmp;
559 break;
560 }
561 }
562 }
563
564 rcu_read_unlock();
565
566 return rt;
567 }
568
mctp_route_lookup_null(struct net * net,struct net_device * dev)569 static struct mctp_route *mctp_route_lookup_null(struct net *net,
570 struct net_device *dev)
571 {
572 struct mctp_route *tmp, *rt = NULL;
573
574 rcu_read_lock();
575
576 list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
577 if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
578 refcount_inc_not_zero(&tmp->refs)) {
579 rt = tmp;
580 break;
581 }
582 }
583
584 rcu_read_unlock();
585
586 return rt;
587 }
588
589 /* sends a skb to rt and releases the route. */
mctp_do_route(struct mctp_route * rt,struct sk_buff * skb)590 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
591 {
592 int rc;
593
594 rc = rt->output(rt, skb);
595 mctp_route_release(rt);
596 return rc;
597 }
598
mctp_do_fragment_route(struct mctp_route * rt,struct sk_buff * skb,unsigned int mtu,u8 tag)599 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
600 unsigned int mtu, u8 tag)
601 {
602 const unsigned int hlen = sizeof(struct mctp_hdr);
603 struct mctp_hdr *hdr, *hdr2;
604 unsigned int pos, size;
605 struct sk_buff *skb2;
606 int rc;
607 u8 seq;
608
609 hdr = mctp_hdr(skb);
610 seq = 0;
611 rc = 0;
612
613 if (mtu < hlen + 1) {
614 kfree_skb(skb);
615 return -EMSGSIZE;
616 }
617
618 /* we've got the header */
619 skb_pull(skb, hlen);
620
621 for (pos = 0; pos < skb->len;) {
622 /* size of message payload */
623 size = min(mtu - hlen, skb->len - pos);
624
625 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
626 if (!skb2) {
627 rc = -ENOMEM;
628 break;
629 }
630
631 /* generic skb copy */
632 skb2->protocol = skb->protocol;
633 skb2->priority = skb->priority;
634 skb2->dev = skb->dev;
635 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
636
637 if (skb->sk)
638 skb_set_owner_w(skb2, skb->sk);
639
640 /* establish packet */
641 skb_reserve(skb2, MCTP_HEADER_MAXLEN);
642 skb_reset_network_header(skb2);
643 skb_put(skb2, hlen + size);
644 skb2->transport_header = skb2->network_header + hlen;
645
646 /* copy header fields, calculate SOM/EOM flags & seq */
647 hdr2 = mctp_hdr(skb2);
648 hdr2->ver = hdr->ver;
649 hdr2->dest = hdr->dest;
650 hdr2->src = hdr->src;
651 hdr2->flags_seq_tag = tag &
652 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
653
654 if (pos == 0)
655 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
656
657 if (pos + size == skb->len)
658 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
659
660 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
661
662 /* copy message payload */
663 skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
664
665 /* do route, but don't drop the rt reference */
666 rc = rt->output(rt, skb2);
667 if (rc)
668 break;
669
670 seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
671 pos += size;
672 }
673
674 mctp_route_release(rt);
675 consume_skb(skb);
676 return rc;
677 }
678
mctp_local_output(struct sock * sk,struct mctp_route * rt,struct sk_buff * skb,mctp_eid_t daddr,u8 req_tag)679 int mctp_local_output(struct sock *sk, struct mctp_route *rt,
680 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
681 {
682 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
683 struct mctp_skb_cb *cb = mctp_cb(skb);
684 struct mctp_hdr *hdr;
685 unsigned long flags;
686 unsigned int mtu;
687 mctp_eid_t saddr;
688 int rc;
689 u8 tag;
690
691 if (WARN_ON(!rt->dev))
692 return -EINVAL;
693
694 spin_lock_irqsave(&rt->dev->addrs_lock, flags);
695 if (rt->dev->num_addrs == 0) {
696 rc = -EHOSTUNREACH;
697 } else {
698 /* use the outbound interface's first address as our source */
699 saddr = rt->dev->addrs[0];
700 rc = 0;
701 }
702 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
703
704 if (rc)
705 return rc;
706
707 if (req_tag & MCTP_HDR_FLAG_TO) {
708 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
709 if (rc)
710 return rc;
711 tag |= MCTP_HDR_FLAG_TO;
712 } else {
713 tag = req_tag;
714 }
715
716
717 skb->protocol = htons(ETH_P_MCTP);
718 skb->priority = 0;
719 skb_reset_transport_header(skb);
720 skb_push(skb, sizeof(struct mctp_hdr));
721 skb_reset_network_header(skb);
722 skb->dev = rt->dev->dev;
723
724 /* cb->net will have been set on initial ingress */
725 cb->src = saddr;
726
727 /* set up common header fields */
728 hdr = mctp_hdr(skb);
729 hdr->ver = 1;
730 hdr->dest = daddr;
731 hdr->src = saddr;
732
733 mtu = mctp_route_mtu(rt);
734
735 if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
736 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
737 tag;
738 return mctp_do_route(rt, skb);
739 } else {
740 return mctp_do_fragment_route(rt, skb, mtu, tag);
741 }
742 }
743
744 /* route management */
mctp_route_add(struct mctp_dev * mdev,mctp_eid_t daddr_start,unsigned int daddr_extent,unsigned int mtu,unsigned char type)745 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
746 unsigned int daddr_extent, unsigned int mtu,
747 unsigned char type)
748 {
749 int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
750 struct net *net = dev_net(mdev->dev);
751 struct mctp_route *rt, *ert;
752
753 if (!mctp_address_ok(daddr_start))
754 return -EINVAL;
755
756 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
757 return -EINVAL;
758
759 switch (type) {
760 case RTN_LOCAL:
761 rtfn = mctp_route_input;
762 break;
763 case RTN_UNICAST:
764 rtfn = mctp_route_output;
765 break;
766 default:
767 return -EINVAL;
768 }
769
770 rt = mctp_route_alloc();
771 if (!rt)
772 return -ENOMEM;
773
774 rt->min = daddr_start;
775 rt->max = daddr_start + daddr_extent;
776 rt->mtu = mtu;
777 rt->dev = mdev;
778 dev_hold(rt->dev->dev);
779 rt->type = type;
780 rt->output = rtfn;
781
782 ASSERT_RTNL();
783 /* Prevent duplicate identical routes. */
784 list_for_each_entry(ert, &net->mctp.routes, list) {
785 if (mctp_rt_compare_exact(rt, ert)) {
786 mctp_route_release(rt);
787 return -EEXIST;
788 }
789 }
790
791 list_add_rcu(&rt->list, &net->mctp.routes);
792
793 return 0;
794 }
795
mctp_route_remove(struct mctp_dev * mdev,mctp_eid_t daddr_start,unsigned int daddr_extent,unsigned char type)796 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
797 unsigned int daddr_extent, unsigned char type)
798 {
799 struct net *net = dev_net(mdev->dev);
800 struct mctp_route *rt, *tmp;
801 mctp_eid_t daddr_end;
802 bool dropped;
803
804 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
805 return -EINVAL;
806
807 daddr_end = daddr_start + daddr_extent;
808 dropped = false;
809
810 ASSERT_RTNL();
811
812 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
813 if (rt->dev == mdev &&
814 rt->min == daddr_start && rt->max == daddr_end &&
815 rt->type == type) {
816 list_del_rcu(&rt->list);
817 /* TODO: immediate RTM_DELROUTE */
818 mctp_route_release(rt);
819 dropped = true;
820 }
821 }
822
823 return dropped ? 0 : -ENOENT;
824 }
825
mctp_route_add_local(struct mctp_dev * mdev,mctp_eid_t addr)826 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
827 {
828 return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
829 }
830
mctp_route_remove_local(struct mctp_dev * mdev,mctp_eid_t addr)831 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
832 {
833 return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
834 }
835
836 /* removes all entries for a given device */
mctp_route_remove_dev(struct mctp_dev * mdev)837 void mctp_route_remove_dev(struct mctp_dev *mdev)
838 {
839 struct net *net = dev_net(mdev->dev);
840 struct mctp_route *rt, *tmp;
841
842 ASSERT_RTNL();
843 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
844 if (rt->dev == mdev) {
845 list_del_rcu(&rt->list);
846 /* TODO: immediate RTM_DELROUTE */
847 mctp_route_release(rt);
848 }
849 }
850 }
851
852 /* Incoming packet-handling */
853
mctp_pkttype_receive(struct sk_buff * skb,struct net_device * dev,struct packet_type * pt,struct net_device * orig_dev)854 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
855 struct packet_type *pt,
856 struct net_device *orig_dev)
857 {
858 struct net *net = dev_net(dev);
859 struct mctp_skb_cb *cb;
860 struct mctp_route *rt;
861 struct mctp_hdr *mh;
862
863 /* basic non-data sanity checks */
864 if (dev->type != ARPHRD_MCTP)
865 goto err_drop;
866
867 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
868 goto err_drop;
869
870 skb_reset_transport_header(skb);
871 skb_reset_network_header(skb);
872
873 /* We have enough for a header; decode and route */
874 mh = mctp_hdr(skb);
875 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
876 goto err_drop;
877
878 cb = __mctp_cb(skb);
879 rcu_read_lock();
880 cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
881 rcu_read_unlock();
882
883 rt = mctp_route_lookup(net, cb->net, mh->dest);
884
885 /* NULL EID, but addressed to our physical address */
886 if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
887 rt = mctp_route_lookup_null(net, dev);
888
889 if (!rt)
890 goto err_drop;
891
892 mctp_do_route(rt, skb);
893
894 return NET_RX_SUCCESS;
895
896 err_drop:
897 kfree_skb(skb);
898 return NET_RX_DROP;
899 }
900
901 static struct packet_type mctp_packet_type = {
902 .type = cpu_to_be16(ETH_P_MCTP),
903 .func = mctp_pkttype_receive,
904 };
905
906 /* netlink interface */
907
908 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
909 [RTA_DST] = { .type = NLA_U8 },
910 [RTA_METRICS] = { .type = NLA_NESTED },
911 [RTA_OIF] = { .type = NLA_U32 },
912 };
913
914 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
915 * tb must hold RTA_MAX+1 elements.
916 */
mctp_route_nlparse(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr ** tb,struct rtmsg ** rtm,struct mctp_dev ** mdev,mctp_eid_t * daddr_start)917 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
918 struct netlink_ext_ack *extack,
919 struct nlattr **tb, struct rtmsg **rtm,
920 struct mctp_dev **mdev, mctp_eid_t *daddr_start)
921 {
922 struct net *net = sock_net(skb->sk);
923 struct net_device *dev;
924 unsigned int ifindex;
925 int rc;
926
927 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
928 rta_mctp_policy, extack);
929 if (rc < 0) {
930 NL_SET_ERR_MSG(extack, "incorrect format");
931 return rc;
932 }
933
934 if (!tb[RTA_DST]) {
935 NL_SET_ERR_MSG(extack, "dst EID missing");
936 return -EINVAL;
937 }
938 *daddr_start = nla_get_u8(tb[RTA_DST]);
939
940 if (!tb[RTA_OIF]) {
941 NL_SET_ERR_MSG(extack, "ifindex missing");
942 return -EINVAL;
943 }
944 ifindex = nla_get_u32(tb[RTA_OIF]);
945
946 *rtm = nlmsg_data(nlh);
947 if ((*rtm)->rtm_family != AF_MCTP) {
948 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
949 return -EINVAL;
950 }
951
952 dev = __dev_get_by_index(net, ifindex);
953 if (!dev) {
954 NL_SET_ERR_MSG(extack, "bad ifindex");
955 return -ENODEV;
956 }
957 *mdev = mctp_dev_get_rtnl(dev);
958 if (!*mdev)
959 return -ENODEV;
960
961 if (dev->flags & IFF_LOOPBACK) {
962 NL_SET_ERR_MSG(extack, "no routes to loopback");
963 return -EINVAL;
964 }
965
966 return 0;
967 }
968
mctp_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)969 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
970 struct netlink_ext_ack *extack)
971 {
972 struct nlattr *tb[RTA_MAX + 1];
973 mctp_eid_t daddr_start;
974 struct mctp_dev *mdev;
975 struct rtmsg *rtm;
976 unsigned int mtu;
977 int rc;
978
979 rc = mctp_route_nlparse(skb, nlh, extack, tb,
980 &rtm, &mdev, &daddr_start);
981 if (rc < 0)
982 return rc;
983
984 if (rtm->rtm_type != RTN_UNICAST) {
985 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
986 return -EINVAL;
987 }
988
989 /* TODO: parse mtu from nlparse */
990 mtu = 0;
991
992 if (rtm->rtm_type != RTN_UNICAST)
993 return -EINVAL;
994
995 rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
996 rtm->rtm_type);
997 return rc;
998 }
999
mctp_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1000 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1001 struct netlink_ext_ack *extack)
1002 {
1003 struct nlattr *tb[RTA_MAX + 1];
1004 mctp_eid_t daddr_start;
1005 struct mctp_dev *mdev;
1006 struct rtmsg *rtm;
1007 int rc;
1008
1009 rc = mctp_route_nlparse(skb, nlh, extack, tb,
1010 &rtm, &mdev, &daddr_start);
1011 if (rc < 0)
1012 return rc;
1013
1014 /* we only have unicast routes */
1015 if (rtm->rtm_type != RTN_UNICAST)
1016 return -EINVAL;
1017
1018 rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
1019 return rc;
1020 }
1021
mctp_fill_rtinfo(struct sk_buff * skb,struct mctp_route * rt,u32 portid,u32 seq,int event,unsigned int flags)1022 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1023 u32 portid, u32 seq, int event, unsigned int flags)
1024 {
1025 struct nlmsghdr *nlh;
1026 struct rtmsg *hdr;
1027 void *metrics;
1028
1029 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1030 if (!nlh)
1031 return -EMSGSIZE;
1032
1033 hdr = nlmsg_data(nlh);
1034 hdr->rtm_family = AF_MCTP;
1035
1036 /* we use the _len fields as a number of EIDs, rather than
1037 * a number of bits in the address
1038 */
1039 hdr->rtm_dst_len = rt->max - rt->min;
1040 hdr->rtm_src_len = 0;
1041 hdr->rtm_tos = 0;
1042 hdr->rtm_table = RT_TABLE_DEFAULT;
1043 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1044 hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
1045 hdr->rtm_type = rt->type;
1046
1047 if (nla_put_u8(skb, RTA_DST, rt->min))
1048 goto cancel;
1049
1050 metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1051 if (!metrics)
1052 goto cancel;
1053
1054 if (rt->mtu) {
1055 if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1056 goto cancel;
1057 }
1058
1059 nla_nest_end(skb, metrics);
1060
1061 if (rt->dev) {
1062 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1063 goto cancel;
1064 }
1065
1066 /* TODO: conditional neighbour physaddr? */
1067
1068 nlmsg_end(skb, nlh);
1069
1070 return 0;
1071
1072 cancel:
1073 nlmsg_cancel(skb, nlh);
1074 return -EMSGSIZE;
1075 }
1076
mctp_dump_rtinfo(struct sk_buff * skb,struct netlink_callback * cb)1077 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1078 {
1079 struct net *net = sock_net(skb->sk);
1080 struct mctp_route *rt;
1081 int s_idx, idx;
1082
1083 /* TODO: allow filtering on route data, possibly under
1084 * cb->strict_check
1085 */
1086
1087 /* TODO: change to struct overlay */
1088 s_idx = cb->args[0];
1089 idx = 0;
1090
1091 rcu_read_lock();
1092 list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1093 if (idx++ < s_idx)
1094 continue;
1095 if (mctp_fill_rtinfo(skb, rt,
1096 NETLINK_CB(cb->skb).portid,
1097 cb->nlh->nlmsg_seq,
1098 RTM_NEWROUTE, NLM_F_MULTI) < 0)
1099 break;
1100 }
1101
1102 rcu_read_unlock();
1103 cb->args[0] = idx;
1104
1105 return skb->len;
1106 }
1107
1108 /* net namespace implementation */
mctp_routes_net_init(struct net * net)1109 static int __net_init mctp_routes_net_init(struct net *net)
1110 {
1111 struct netns_mctp *ns = &net->mctp;
1112
1113 INIT_LIST_HEAD(&ns->routes);
1114 INIT_HLIST_HEAD(&ns->binds);
1115 mutex_init(&ns->bind_lock);
1116 INIT_HLIST_HEAD(&ns->keys);
1117 spin_lock_init(&ns->keys_lock);
1118 WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1119 return 0;
1120 }
1121
mctp_routes_net_exit(struct net * net)1122 static void __net_exit mctp_routes_net_exit(struct net *net)
1123 {
1124 struct mctp_route *rt;
1125
1126 rcu_read_lock();
1127 list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1128 mctp_route_release(rt);
1129 rcu_read_unlock();
1130 }
1131
1132 static struct pernet_operations mctp_net_ops = {
1133 .init = mctp_routes_net_init,
1134 .exit = mctp_routes_net_exit,
1135 };
1136
mctp_routes_init(void)1137 int __init mctp_routes_init(void)
1138 {
1139 dev_add_pack(&mctp_packet_type);
1140
1141 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
1142 NULL, mctp_dump_rtinfo, 0);
1143 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
1144 mctp_newroute, NULL, 0);
1145 rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
1146 mctp_delroute, NULL, 0);
1147
1148 return register_pernet_subsys(&mctp_net_ops);
1149 }
1150
mctp_routes_exit(void)1151 void mctp_routes_exit(void)
1152 {
1153 unregister_pernet_subsys(&mctp_net_ops);
1154 rtnl_unregister(PF_MCTP, RTM_DELROUTE);
1155 rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
1156 rtnl_unregister(PF_MCTP, RTM_GETROUTE);
1157 dev_remove_pack(&mctp_packet_type);
1158 }
1159