1 /*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 * We mainly focus on the routing table / gateways because those are the
25 * elements that decide if we can get on to the internet or not.
26 *
27 * We also need to understand the source addresses of possible outgoing routes,
28 * and follow LINK down (ifconfig down) to clean up routes on the interface idx
29 * going down that are not otherwise cleaned.
30 */
31
32 #include <private-lib-core.h>
33
34 #include <asm/types.h>
35 #include <sys/socket.h>
36 #include <linux/netlink.h>
37 #include <linux/rtnetlink.h>
38
39 /* work around CentOS 7 -Wconversion problem */
40 #undef RTA_ALIGNTO
41 #define RTA_ALIGNTO 4U
42
43 //#define lwsl_netlink lwsl_notice
44 #define lwsl_cx_netlink lwsl_cx_info
45
46 static void
lws_netlink_coldplug_done_cb(lws_sorted_usec_list_t * sul)47 lws_netlink_coldplug_done_cb(lws_sorted_usec_list_t *sul)
48 {
49 struct lws_context *ctx = lws_container_of(sul, struct lws_context,
50 sul_nl_coldplug);
51 ctx->nl_initial_done = 1;
52 #if defined(LWS_WITH_SYS_STATE)
53 /* if nothing is there to intercept anything, go all the way */
54 lws_state_transition_steps(&ctx->mgr_system, LWS_SYSTATE_OPERATIONAL);
55 #endif
56 }
57
58 static int
rops_handle_POLLIN_netlink(struct lws_context_per_thread * pt,struct lws * wsi,struct lws_pollfd * pollfd)59 rops_handle_POLLIN_netlink(struct lws_context_per_thread *pt, struct lws *wsi,
60 struct lws_pollfd *pollfd)
61 {
62 struct lws_context *cx = pt->context;
63 uint8_t s[4096]
64 #if defined(_DEBUG)
65 , route_change = 0
66 #endif
67 #if defined(LWS_WITH_SYS_SMD)
68 , gateway_change = 0
69 #endif
70 ;
71 struct sockaddr_nl nladdr;
72 lws_route_t robj, *rou, *rmat;
73 struct nlmsghdr *h;
74 struct msghdr msg;
75 struct iovec iov;
76 unsigned int n;
77 char buf[72];
78
79 if (!(pollfd->revents & LWS_POLLIN))
80 return LWS_HPI_RET_HANDLED;
81
82 memset(&msg, 0, sizeof(msg));
83
84 iov.iov_base = (void *)s;
85 iov.iov_len = sizeof(s);
86
87 msg.msg_name = (void *)&(nladdr);
88 msg.msg_namelen = sizeof(nladdr);
89
90 msg.msg_iov = &iov;
91 msg.msg_iovlen = 1;
92
93 n = (unsigned int)recvmsg(wsi->desc.sockfd, &msg, 0);
94 if ((int)n < 0) {
95 lwsl_cx_notice(cx, "recvmsg failed");
96 return LWS_HPI_RET_PLEASE_CLOSE_ME;
97 }
98
99 // lwsl_hexdump_notice(s, (size_t)n);
100
101 h = (struct nlmsghdr *)s;
102
103 /* we can get a bunch of messages coalesced in one read*/
104
105 for ( ; NLMSG_OK(h, n); h = NLMSG_NEXT(h, n)) {
106 struct ifaddrmsg *ifam;
107 struct rtattr *ra;
108 struct rtmsg *rm;
109 #if !defined(LWS_WITH_NO_LOGS) && defined(_DEBUG)
110 struct ndmsg *nd;
111 #endif
112 unsigned int ra_len;
113 uint8_t *p;
114
115 struct ifinfomsg *ifi;
116 struct rtattr *attribute;
117 unsigned int len;
118
119 lwsl_cx_netlink(cx, "RTM %d", h->nlmsg_type);
120
121 memset(&robj, 0, sizeof(robj));
122 robj.if_idx = -1;
123 robj.priority = -1;
124 rm = (struct rtmsg *)NLMSG_DATA(h);
125
126 /*
127 * We have to care about NEWLINK so we can understand when a
128 * network interface went down, and clear the related routes.
129 *
130 * We don't get individual DELROUTEs for these.
131 */
132
133 switch (h->nlmsg_type) {
134 case RTM_NEWLINK:
135
136 ifi = NLMSG_DATA(h);
137 len = (unsigned int)(h->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)));
138
139 /* loop over all attributes for the NEWLINK message */
140 for (attribute = IFLA_RTA(ifi); RTA_OK(attribute, len);
141 attribute = RTA_NEXT(attribute, len)) {
142 lwsl_cx_netlink(cx, "if attr %d",
143 (int)attribute->rta_type);
144 switch(attribute->rta_type) {
145 case IFLA_IFNAME:
146 lwsl_cx_netlink(cx, "NETLINK ifidx %d : %s",
147 ifi->ifi_index,
148 (char *)RTA_DATA(attribute));
149 break;
150 default:
151 break;
152 } /* switch */
153 } /* for loop */
154
155 lwsl_cx_netlink(cx, "NEWLINK ifi_index %d, flags 0x%x",
156 ifi->ifi_index, ifi->ifi_flags);
157
158 /*
159 * Despite "New"link this is actually telling us there
160 * is some change on the network interface IFF_ state
161 */
162
163 if (!(ifi->ifi_flags & IFF_UP)) {
164 /*
165 * Interface is down, so scrub all routes that
166 * applied to it
167 */
168 lwsl_cx_netlink(cx, "NEWLINK: ifdown %d",
169 ifi->ifi_index);
170 lws_pt_lock(pt, __func__);
171 _lws_route_table_ifdown(pt, ifi->ifi_index);
172 lws_pt_unlock(pt);
173 }
174 continue; /* ie, not break, no second half */
175
176 case RTM_NEWADDR:
177 case RTM_DELADDR:
178
179 ifam = (struct ifaddrmsg *)NLMSG_DATA(h);
180
181 robj.source_ads = 1;
182 robj.dest_len = ifam->ifa_prefixlen;
183 robj.if_idx = (int)ifam->ifa_index;
184 robj.scope = ifam->ifa_scope;
185 robj.ifa_flags = ifam->ifa_flags;
186 robj.dest.sa4.sin_family = ifam->ifa_family;
187
188 /* address attributes */
189 ra = (struct rtattr *)IFA_RTA(ifam);
190 ra_len = (unsigned int)IFA_PAYLOAD(h);
191
192 lwsl_cx_netlink(cx, "%s",
193 h->nlmsg_type == RTM_NEWADDR ?
194 "NEWADDR" : "DELADDR");
195
196 /*
197 * almost nothing interesting within IFA_* attributes:
198 * so skip it and goto to the second half
199 */
200 goto second_half;
201
202 case RTM_NEWROUTE:
203 case RTM_DELROUTE:
204
205 lwsl_cx_netlink(cx, "%s",
206 h->nlmsg_type == RTM_NEWROUTE ?
207 "NEWROUTE" : "DELROUTE");
208
209 /* route attributes */
210 ra = (struct rtattr *)RTM_RTA(rm);
211 ra_len = (unsigned int)RTM_PAYLOAD(h);
212 break;
213
214 case RTM_DELNEIGH:
215 case RTM_NEWNEIGH:
216 lwsl_cx_netlink(cx, "%s", h->nlmsg_type ==
217 RTM_NEWNEIGH ? "NEWNEIGH" :
218 "DELNEIGH");
219 #if !defined(LWS_WITH_NO_LOGS) && defined(_DEBUG)
220 nd = (struct ndmsg *)rm;
221 lwsl_cx_netlink(cx, "fam %u, ifidx %u, flags 0x%x",
222 nd->ndm_family, nd->ndm_ifindex,
223 nd->ndm_flags);
224 #endif
225 ra = (struct rtattr *)RTM_RTA(rm);
226 ra_len = (unsigned int)RTM_PAYLOAD(h);
227 for ( ; RTA_OK(ra, ra_len); ra = RTA_NEXT(ra, ra_len)) {
228 lwsl_cx_netlink(cx, "atr %d", ra->rta_type);
229 switch (ra->rta_type) {
230 case NDA_DST:
231 lwsl_cx_netlink(cx, "dst len %d",
232 ra->rta_len);
233 break;
234 }
235 }
236 lws_pt_lock(pt, __func__);
237 _lws_route_pt_close_unroutable(pt);
238 lws_pt_unlock(pt);
239 continue;
240
241 default:
242 lwsl_cx_netlink(cx, "*** Unknown RTM_%d",
243 h->nlmsg_type);
244 continue;
245 } /* switch */
246
247 robj.proto = rm->rtm_protocol;
248
249 // iterate over route attributes
250 for ( ; RTA_OK(ra, ra_len); ra = RTA_NEXT(ra, ra_len)) {
251 // lwsl_netlink("%s: atr %d\n", __func__, ra->rta_type);
252 switch (ra->rta_type) {
253 case RTA_PREFSRC: /* protocol ads: preferred src ads */
254 case RTA_SRC:
255 lws_sa46_copy_address(&robj.src, RTA_DATA(ra),
256 rm->rtm_family);
257 robj.src_len = rm->rtm_src_len;
258 lws_sa46_write_numeric_address(&robj.src, buf, sizeof(buf));
259 lwsl_cx_netlink(cx, "RTA_SRC: %s", buf);
260 break;
261 case RTA_DST:
262 lws_sa46_copy_address(&robj.dest, RTA_DATA(ra),
263 rm->rtm_family);
264 robj.dest_len = rm->rtm_dst_len;
265 lws_sa46_write_numeric_address(&robj.dest, buf, sizeof(buf));
266 lwsl_cx_netlink(cx, "RTA_DST: %s", buf);
267 break;
268 case RTA_GATEWAY:
269 lws_sa46_copy_address(&robj.gateway,
270 RTA_DATA(ra),
271 rm->rtm_family);
272 #if defined(LWS_WITH_SYS_SMD)
273 gateway_change = 1;
274 #endif
275 break;
276 case RTA_IIF: /* int: input interface index */
277 case RTA_OIF: /* int: output interface index */
278 robj.if_idx = *(int *)RTA_DATA(ra);
279 lwsl_cx_netlink(cx, "ifidx %d", robj.if_idx);
280 break;
281 case RTA_PRIORITY: /* int: priority of route */
282 p = RTA_DATA(ra);
283 robj.priority = p[3] << 24 | p[2] << 16 |
284 p[1] << 8 | p[0];
285 break;
286 case RTA_CACHEINFO: /* struct rta_cacheinfo */
287 break;
288 #if defined(LWS_HAVE_RTA_PREF)
289 case RTA_PREF: /* char: RFC4191 v6 router preference */
290 break;
291 #endif
292 case RTA_TABLE: /* int */
293 break;
294
295 default:
296 lwsl_cx_info(cx, "unknown attr type %d",
297 ra->rta_type);
298 break;
299 }
300 } /* for */
301
302 /*
303 * the second half, once all the attributes were collected
304 */
305 second_half:
306 switch (h->nlmsg_type) {
307
308 case RTM_DELROUTE:
309 /*
310 * This will also take down wsi marked as using it
311 */
312 lwsl_cx_netlink(cx, "DELROUTE: if_idx %d",
313 robj.if_idx);
314 lws_pt_lock(pt, __func__);
315 _lws_route_remove(pt, &robj, 0);
316 lws_pt_unlock(pt);
317 goto inform;
318
319 case RTM_NEWROUTE:
320
321 lwsl_cx_netlink(cx, "NEWROUTE rtm_type %d",
322 rm->rtm_type);
323
324 /*
325 * We don't want any routing debris like /32 or broadcast
326 * in our routing table... we will collect source addresses
327 * bound to interfaces via NEWADDR
328 */
329
330 if (rm->rtm_type != RTN_UNICAST &&
331 rm->rtm_type != RTN_LOCAL)
332 break;
333
334 if (rm->rtm_flags & RTM_F_CLONED)
335 break;
336
337 goto ana;
338
339 case RTM_DELADDR:
340 lwsl_cx_notice(cx, "DELADDR");
341 #if defined(_DEBUG)
342 _lws_routing_entry_dump(cx, &robj);
343 #endif
344 lws_pt_lock(pt, __func__);
345 _lws_route_remove(pt, &robj, LRR_MATCH_SRC | LRR_IGNORE_PRI);
346 _lws_route_pt_close_unroutable(pt);
347 lws_pt_unlock(pt);
348 break;
349
350 case RTM_NEWADDR:
351
352 lwsl_cx_netlink(cx, "NEWADDR");
353 ana:
354
355 /*
356 * Is robj a dupe in the routing table already?
357 *
358 * match on pri ignore == set pri and skip
359 * no match == add
360 */
361
362 lws_pt_lock(pt, __func__);
363
364 /* returns zero on match already in table */
365 rmat = _lws_route_remove(pt, &robj, LRR_MATCH_SRC |
366 LRR_JUST_CHECK |
367 LRR_IGNORE_PRI);
368 lws_pt_unlock(pt);
369
370 if (rmat) {
371 rmat->priority = robj.priority;
372 break;
373 }
374
375 rou = lws_malloc(sizeof(*rou), __func__);
376 if (!rou) {
377 lwsl_cx_err(cx, "oom");
378 return LWS_HPI_RET_HANDLED;
379 }
380
381 *rou = robj;
382
383 lws_pt_lock(pt, __func__);
384
385 /*
386 * We lock the pt before getting the uidx, so it
387 * cannot race
388 */
389
390 rou->uidx = _lws_route_get_uidx(cx);
391 lws_dll2_add_tail(&rou->list, &cx->routing_table);
392 lwsl_cx_info(cx, "route list size %u",
393 cx->routing_table.count);
394
395 _lws_route_pt_close_unroutable(pt);
396
397 lws_pt_unlock(pt);
398
399 inform:
400 #if defined(_DEBUG)
401 route_change = 1;
402 #endif
403 #if defined(LWS_WITH_SYS_SMD)
404 /*
405 * Reflect the route add / del event using SMD.
406 * Participants interested can refer to the pt
407 * routing table
408 */
409 (void)lws_smd_msg_printf(cx, LWSSMDCL_NETWORK,
410 "{\"rt\":\"%s\"}\n",
411 (h->nlmsg_type == RTM_DELROUTE) ?
412 "del" : "add");
413 #endif
414
415 break;
416
417 default:
418 // lwsl_info("%s: unknown msg type %d\n", __func__,
419 // h->nlmsg_type);
420 break;
421 }
422 } /* message iterator */
423
424 #if defined(LWS_WITH_SYS_SMD)
425 if (gateway_change)
426 /*
427 * If a route with a gw was added or deleted, retrigger captive
428 * portal detection if we have that
429 */
430 (void)lws_smd_msg_printf(cx, LWSSMDCL_NETWORK,
431 "{\"trigger\": \"cpdcheck\", "
432 "\"src\":\"gw-change\"}");
433 #endif
434
435 #if defined(_DEBUG)
436 if (route_change) {
437 lws_context_lock(cx, __func__);
438 _lws_routing_table_dump(cx);
439 lws_context_unlock(cx);
440 }
441 #endif
442
443 if (!cx->nl_initial_done &&
444 pt == &cx->pt[0] &&
445 cx->routing_table.count) {
446 /*
447 * While netlink info still coming, keep moving the timer for
448 * calling it "done" to +100ms until after it stops coming
449 */
450 lws_context_lock(cx, __func__);
451 lws_sul_schedule(cx, 0, &cx->sul_nl_coldplug,
452 lws_netlink_coldplug_done_cb,
453 100 * LWS_US_PER_MS);
454 lws_context_unlock(cx);
455 }
456
457 return LWS_HPI_RET_HANDLED;
458 }
459
460 struct nl_req_s {
461 struct nlmsghdr hdr;
462 struct rtmsg gen;
463 };
464
465 int
rops_pt_init_destroy_netlink(struct lws_context * context,const struct lws_context_creation_info * info,struct lws_context_per_thread * pt,int destroy)466 rops_pt_init_destroy_netlink(struct lws_context *context,
467 const struct lws_context_creation_info *info,
468 struct lws_context_per_thread *pt, int destroy)
469 {
470 struct sockaddr_nl sanl;
471 struct nl_req_s req;
472 struct msghdr msg;
473 struct iovec iov;
474 struct lws *wsi;
475 int n, ret = 1;
476
477 if (destroy) {
478
479 /*
480 * pt netlink wsi closed + freed as part of pt's destroy
481 * wsi mass close, just need to take down the routing table
482 */
483 _lws_route_table_empty(pt);
484
485 return 0;
486 }
487
488 if (context->netlink)
489 return 0;
490
491 if (pt > &context->pt[0])
492 /* we can only have one netlink socket */
493 return 0;
494
495 lwsl_cx_info(context, "creating netlink skt");
496
497 /*
498 * We want a netlink socket per pt as well
499 */
500
501 lws_context_lock(context, __func__);
502 wsi = __lws_wsi_create_with_role(context, (int)(pt - &context->pt[0]),
503 &role_ops_netlink, NULL);
504 lws_context_unlock(context);
505 if (!wsi)
506 goto bail;
507
508 wsi->desc.sockfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
509 if (wsi->desc.sockfd == LWS_SOCK_INVALID) {
510 lwsl_cx_err(context, "unable to open netlink");
511 goto bail1;
512 }
513
514 lws_plat_set_nonblocking(wsi->desc.sockfd);
515
516 __lws_lc_tag(context, &context->lcg[LWSLCG_VHOST], &wsi->lc,
517 "netlink");
518
519 memset(&sanl, 0, sizeof(sanl));
520 sanl.nl_family = AF_NETLINK;
521 sanl.nl_pid = (uint32_t)getpid();
522 sanl.nl_groups = (1 << (RTNLGRP_LINK - 1)) |
523 (1 << (RTNLGRP_IPV4_ROUTE - 1)) |
524 (1 << (RTNLGRP_IPV4_IFADDR - 1))
525 #if defined(LWS_WITH_IPV6)
526 | (1 << (RTNLGRP_IPV6_ROUTE - 1)) |
527 (1 << (RTNLGRP_IPV6_IFADDR - 1))
528 #endif
529 ;
530
531 if (lws_fi(&context->fic, "netlink_bind") ||
532 bind(wsi->desc.sockfd, (struct sockaddr*)&sanl, sizeof(sanl)) < 0) {
533 lwsl_cx_warn(context, "netlink bind failed");
534 ret = 0; /* some systems deny access, just ignore */
535 goto bail2;
536 }
537
538 context->netlink = wsi;
539 if (lws_wsi_inject_to_loop(pt, wsi))
540 goto bail2;
541
542 /* if (lws_change_pollfd(wsi, 0, LWS_POLLIN)) {
543 lwsl_err("%s: pollfd in fail\n", __func__);
544 goto bail2;
545 }
546 */
547 /*
548 * Since we're starting the PT, ask to be sent all the existing routes.
549 *
550 * This requires CAP_ADMIN, or root... we do this early before dropping
551 * privs
552 */
553
554 memset(&sanl, 0, sizeof(sanl));
555 memset(&msg, 0, sizeof(msg));
556 memset(&req, 0, sizeof(req));
557
558 sanl.nl_family = AF_NETLINK;
559
560 req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.gen));
561 req.hdr.nlmsg_type = RTM_GETROUTE;
562 req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
563 req.hdr.nlmsg_seq = 1;
564 req.hdr.nlmsg_pid = (uint32_t)getpid();
565 req.gen.rtm_family = AF_PACKET;
566 req.gen.rtm_table = RT_TABLE_DEFAULT;
567
568 iov.iov_base = &req;
569 iov.iov_len = req.hdr.nlmsg_len;
570 msg.msg_iov = &iov;
571 msg.msg_iovlen = 1;
572 msg.msg_name = &sanl;
573 msg.msg_namelen = sizeof(sanl);
574
575 n = (int)sendmsg(wsi->desc.sockfd, (struct msghdr *)&msg, 0);
576 if (n < 0) {
577 lwsl_cx_notice(context, "rt dump req failed... permissions? errno %d",
578 LWS_ERRNO);
579 }
580
581 /*
582 * Responses are going to come asynchronously, let's block moving
583 * off state IFACE_COLDPLUG until we have had them. This is important
584 * since if we don't hold there, when we do get the responses we may
585 * cull any ongoing connections as unroutable otherwise
586 */
587
588 lwsl_cx_debug(context, "starting netlink coldplug wait");
589
590 return 0;
591
592 bail2:
593 __lws_lc_untag(wsi->a.context, &wsi->lc);
594 compatible_close(wsi->desc.sockfd);
595 bail1:
596 lws_free(wsi);
597 bail:
598 return ret;
599 }
600
601 static const lws_rops_t rops_table_netlink[] = {
602 /* 1 */ { .pt_init_destroy = rops_pt_init_destroy_netlink },
603 /* 2 */ { .handle_POLLIN = rops_handle_POLLIN_netlink },
604 };
605
606 const struct lws_role_ops role_ops_netlink = {
607 /* role name */ "netlink",
608 /* alpn id */ NULL,
609
610 /* rops_table */ rops_table_netlink,
611 /* rops_idx */ {
612 /* LWS_ROPS_check_upgrades */
613 /* LWS_ROPS_pt_init_destroy */ 0x01,
614 /* LWS_ROPS_init_vhost */
615 /* LWS_ROPS_destroy_vhost */ 0x00,
616 /* LWS_ROPS_service_flag_pending */
617 /* LWS_ROPS_handle_POLLIN */ 0x02,
618 /* LWS_ROPS_handle_POLLOUT */
619 /* LWS_ROPS_perform_user_POLLOUT */ 0x00,
620 /* LWS_ROPS_callback_on_writable */
621 /* LWS_ROPS_tx_credit */ 0x00,
622 /* LWS_ROPS_write_role_protocol */
623 /* LWS_ROPS_encapsulation_parent */ 0x00,
624 /* LWS_ROPS_alpn_negotiated */
625 /* LWS_ROPS_close_via_role_protocol */ 0x00,
626 /* LWS_ROPS_close_role */
627 /* LWS_ROPS_close_kill_connection */ 0x00,
628 /* LWS_ROPS_destroy_role */
629 /* LWS_ROPS_adoption_bind */ 0x00,
630 /* LWS_ROPS_client_bind */
631 /* LWS_ROPS_issue_keepalive */ 0x00,
632 },
633
634 /* adoption_cb clnt, srv */ { 0, 0 },
635 /* rx_cb clnt, srv */ { 0, 0 },
636 /* writeable cb clnt, srv */ { 0, 0 },
637 /* close cb clnt, srv */ { 0, 0 },
638 /* protocol_bind_cb c,s */ { 0, 0 },
639 /* protocol_unbind_cb c,s */ { 0, 0 },
640 /* file_handle */ 0,
641 };
642