1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 struct dst_entry *dst = skb_dst(skb);
106
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 sk->sk_rx_dst_ifindex = skb->skb_iif;
112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113 }
114 }
115
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->dest,
121 tcp_hdr(skb)->source);
122 }
123
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 int addr_len)
132 {
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
136 */
137 if (addr_len < SIN6_LEN_RFC2133)
138 return -EINVAL;
139
140 sock_owned_by_me(sk);
141
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 int addr_len)
147 {
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct in6_addr *saddr = NULL, *final_p, final;
151 struct inet_timewait_death_row *tcp_death_row;
152 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
155 struct net *net = sock_net(sk);
156 struct ipv6_txoptions *opt;
157 struct dst_entry *dst;
158 struct flowi6 fl6;
159 int addr_type;
160 int err;
161
162 if (addr_len < SIN6_LEN_RFC2133)
163 return -EINVAL;
164
165 if (usin->sin6_family != AF_INET6)
166 return -EAFNOSUPPORT;
167
168 memset(&fl6, 0, sizeof(fl6));
169
170 if (np->sndflow) {
171 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172 IP6_ECN_flow_init(fl6.flowlabel);
173 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174 struct ip6_flowlabel *flowlabel;
175 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176 if (IS_ERR(flowlabel))
177 return -EINVAL;
178 fl6_sock_release(flowlabel);
179 }
180 }
181
182 /*
183 * connect() to INADDR_ANY means loopback (BSD'ism).
184 */
185
186 if (ipv6_addr_any(&usin->sin6_addr)) {
187 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189 &usin->sin6_addr);
190 else
191 usin->sin6_addr = in6addr_loopback;
192 }
193
194 addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196 if (addr_type & IPV6_ADDR_MULTICAST)
197 return -ENETUNREACH;
198
199 if (addr_type&IPV6_ADDR_LINKLOCAL) {
200 if (addr_len >= sizeof(struct sockaddr_in6) &&
201 usin->sin6_scope_id) {
202 /* If interface is set while binding, indices
203 * must coincide.
204 */
205 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206 return -EINVAL;
207
208 sk->sk_bound_dev_if = usin->sin6_scope_id;
209 }
210
211 /* Connect to link-local address requires an interface */
212 if (!sk->sk_bound_dev_if)
213 return -EINVAL;
214 }
215
216 if (tp->rx_opt.ts_recent_stamp &&
217 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218 tp->rx_opt.ts_recent = 0;
219 tp->rx_opt.ts_recent_stamp = 0;
220 WRITE_ONCE(tp->write_seq, 0);
221 }
222
223 sk->sk_v6_daddr = usin->sin6_addr;
224 np->flow_label = fl6.flowlabel;
225
226 /*
227 * TCP over IPv4
228 */
229
230 if (addr_type & IPV6_ADDR_MAPPED) {
231 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232 struct sockaddr_in sin;
233
234 if (ipv6_only_sock(sk))
235 return -ENETUNREACH;
236
237 sin.sin_family = AF_INET;
238 sin.sin_port = usin->sin6_port;
239 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243 if (sk_is_mptcp(sk))
244 mptcpv6_handle_mapped(sk, true);
245 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252 if (err) {
253 icsk->icsk_ext_hdr_len = exthdrlen;
254 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256 if (sk_is_mptcp(sk))
257 mptcpv6_handle_mapped(sk, false);
258 sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260 tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262 goto failure;
263 }
264 np->saddr = sk->sk_v6_rcv_saddr;
265
266 return err;
267 }
268
269 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270 saddr = &sk->sk_v6_rcv_saddr;
271
272 fl6.flowi6_proto = IPPROTO_TCP;
273 fl6.daddr = sk->sk_v6_daddr;
274 fl6.saddr = saddr ? *saddr : np->saddr;
275 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
276 fl6.flowi6_oif = sk->sk_bound_dev_if;
277 fl6.flowi6_mark = sk->sk_mark;
278 fl6.fl6_dport = usin->sin6_port;
279 fl6.fl6_sport = inet->inet_sport;
280 fl6.flowi6_uid = sk->sk_uid;
281
282 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
283 final_p = fl6_update_dst(&fl6, opt, &final);
284
285 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286
287 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
288 if (IS_ERR(dst)) {
289 err = PTR_ERR(dst);
290 goto failure;
291 }
292
293 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
294
295 if (!saddr) {
296 saddr = &fl6.saddr;
297
298 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
299 if (err)
300 goto failure;
301 }
302
303 /* set the source address */
304 np->saddr = *saddr;
305 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
306
307 sk->sk_gso_type = SKB_GSO_TCPV6;
308 ip6_dst_store(sk, dst, NULL, NULL);
309
310 icsk->icsk_ext_hdr_len = 0;
311 if (opt)
312 icsk->icsk_ext_hdr_len = opt->opt_flen +
313 opt->opt_nflen;
314
315 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
316
317 inet->inet_dport = usin->sin6_port;
318
319 tcp_set_state(sk, TCP_SYN_SENT);
320 err = inet6_hash_connect(tcp_death_row, sk);
321 if (err)
322 goto late_failure;
323
324 sk_set_txhash(sk);
325
326 if (likely(!tp->repair)) {
327 if (!tp->write_seq)
328 WRITE_ONCE(tp->write_seq,
329 secure_tcpv6_seq(np->saddr.s6_addr32,
330 sk->sk_v6_daddr.s6_addr32,
331 inet->inet_sport,
332 inet->inet_dport));
333 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
334 sk->sk_v6_daddr.s6_addr32);
335 }
336
337 if (tcp_fastopen_defer_connect(sk, &err))
338 return err;
339 if (err)
340 goto late_failure;
341
342 err = tcp_connect(sk);
343 if (err)
344 goto late_failure;
345
346 return 0;
347
348 late_failure:
349 tcp_set_state(sk, TCP_CLOSE);
350 inet_bhash2_reset_saddr(sk);
351 failure:
352 inet->inet_dport = 0;
353 sk->sk_route_caps = 0;
354 return err;
355 }
356
tcp_v6_mtu_reduced(struct sock * sk)357 static void tcp_v6_mtu_reduced(struct sock *sk)
358 {
359 struct dst_entry *dst;
360 u32 mtu;
361
362 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
363 return;
364
365 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
366
367 /* Drop requests trying to increase our current mss.
368 * Check done in __ip6_rt_update_pmtu() is too late.
369 */
370 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
371 return;
372
373 dst = inet6_csk_update_pmtu(sk, mtu);
374 if (!dst)
375 return;
376
377 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
378 tcp_sync_mss(sk, dst_mtu(dst));
379 tcp_simple_retransmit(sk);
380 }
381 }
382
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)383 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
384 u8 type, u8 code, int offset, __be32 info)
385 {
386 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
387 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
388 struct net *net = dev_net(skb->dev);
389 struct request_sock *fastopen;
390 struct ipv6_pinfo *np;
391 struct tcp_sock *tp;
392 __u32 seq, snd_una;
393 struct sock *sk;
394 bool fatal;
395 int err;
396
397 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
398 &hdr->daddr, th->dest,
399 &hdr->saddr, ntohs(th->source),
400 skb->dev->ifindex, inet6_sdif(skb));
401
402 if (!sk) {
403 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
404 ICMP6_MIB_INERRORS);
405 return -ENOENT;
406 }
407
408 if (sk->sk_state == TCP_TIME_WAIT) {
409 inet_twsk_put(inet_twsk(sk));
410 return 0;
411 }
412 seq = ntohl(th->seq);
413 fatal = icmpv6_err_convert(type, code, &err);
414 if (sk->sk_state == TCP_NEW_SYN_RECV) {
415 tcp_req_err(sk, seq, fatal);
416 return 0;
417 }
418
419 bh_lock_sock(sk);
420 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
421 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
422
423 if (sk->sk_state == TCP_CLOSE)
424 goto out;
425
426 if (static_branch_unlikely(&ip6_min_hopcount)) {
427 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
428 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
429 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
430 goto out;
431 }
432 }
433
434 tp = tcp_sk(sk);
435 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
436 fastopen = rcu_dereference(tp->fastopen_rsk);
437 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
438 if (sk->sk_state != TCP_LISTEN &&
439 !between(seq, snd_una, tp->snd_nxt)) {
440 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441 goto out;
442 }
443
444 np = tcp_inet6_sk(sk);
445
446 if (type == NDISC_REDIRECT) {
447 if (!sock_owned_by_user(sk)) {
448 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
449
450 if (dst)
451 dst->ops->redirect(dst, sk, skb);
452 }
453 goto out;
454 }
455
456 if (type == ICMPV6_PKT_TOOBIG) {
457 u32 mtu = ntohl(info);
458
459 /* We are not interested in TCP_LISTEN and open_requests
460 * (SYN-ACKs send out by Linux are always <576bytes so
461 * they should go through unfragmented).
462 */
463 if (sk->sk_state == TCP_LISTEN)
464 goto out;
465
466 if (!ip6_sk_accept_pmtu(sk))
467 goto out;
468
469 if (mtu < IPV6_MIN_MTU)
470 goto out;
471
472 WRITE_ONCE(tp->mtu_info, mtu);
473
474 if (!sock_owned_by_user(sk))
475 tcp_v6_mtu_reduced(sk);
476 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
477 &sk->sk_tsq_flags))
478 sock_hold(sk);
479 goto out;
480 }
481
482
483 /* Might be for an request_sock */
484 switch (sk->sk_state) {
485 case TCP_SYN_SENT:
486 case TCP_SYN_RECV:
487 /* Only in fast or simultaneous open. If a fast open socket is
488 * already accepted it is treated as a connected one below.
489 */
490 if (fastopen && !fastopen->sk)
491 break;
492
493 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
494
495 if (!sock_owned_by_user(sk)) {
496 sk->sk_err = err;
497 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
498
499 tcp_done(sk);
500 } else
501 sk->sk_err_soft = err;
502 goto out;
503 case TCP_LISTEN:
504 break;
505 default:
506 /* check if this ICMP message allows revert of backoff.
507 * (see RFC 6069)
508 */
509 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
510 code == ICMPV6_NOROUTE)
511 tcp_ld_RTO_revert(sk, seq);
512 }
513
514 if (!sock_owned_by_user(sk) && np->recverr) {
515 sk->sk_err = err;
516 sk_error_report(sk);
517 } else
518 sk->sk_err_soft = err;
519
520 out:
521 bh_unlock_sock(sk);
522 sock_put(sk);
523 return 0;
524 }
525
526
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)527 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
528 struct flowi *fl,
529 struct request_sock *req,
530 struct tcp_fastopen_cookie *foc,
531 enum tcp_synack_type synack_type,
532 struct sk_buff *syn_skb)
533 {
534 struct inet_request_sock *ireq = inet_rsk(req);
535 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
536 struct ipv6_txoptions *opt;
537 struct flowi6 *fl6 = &fl->u.ip6;
538 struct sk_buff *skb;
539 int err = -ENOMEM;
540 u8 tclass;
541
542 /* First, grab a route. */
543 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
544 IPPROTO_TCP)) == NULL)
545 goto done;
546
547 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548
549 if (skb) {
550 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
551 &ireq->ir_v6_rmt_addr);
552
553 fl6->daddr = ireq->ir_v6_rmt_addr;
554 if (np->repflow && ireq->pktopts)
555 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
556
557 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
558 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
559 (np->tclass & INET_ECN_MASK) :
560 np->tclass;
561
562 if (!INET_ECN_is_capable(tclass) &&
563 tcp_bpf_ca_needs_ecn((struct sock *)req))
564 tclass |= INET_ECN_ECT_0;
565
566 rcu_read_lock();
567 opt = ireq->ipv6_opt;
568 if (!opt)
569 opt = rcu_dereference(np->opt);
570 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
571 opt, tclass, sk->sk_priority);
572 rcu_read_unlock();
573 err = net_xmit_eval(err);
574 }
575
576 done:
577 return err;
578 }
579
580
tcp_v6_reqsk_destructor(struct request_sock * req)581 static void tcp_v6_reqsk_destructor(struct request_sock *req)
582 {
583 kfree(inet_rsk(req)->ipv6_opt);
584 consume_skb(inet_rsk(req)->pktopts);
585 }
586
587 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
589 const struct in6_addr *addr,
590 int l3index)
591 {
592 return tcp_md5_do_lookup(sk, l3index,
593 (union tcp_md5_addr *)addr, AF_INET6);
594 }
595
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
597 const struct sock *addr_sk)
598 {
599 int l3index;
600
601 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
602 addr_sk->sk_bound_dev_if);
603 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
604 l3index);
605 }
606
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
608 sockptr_t optval, int optlen)
609 {
610 struct tcp_md5sig cmd;
611 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
612 int l3index = 0;
613 u8 prefixlen;
614 u8 flags;
615
616 if (optlen < sizeof(cmd))
617 return -EINVAL;
618
619 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
620 return -EFAULT;
621
622 if (sin6->sin6_family != AF_INET6)
623 return -EINVAL;
624
625 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
626
627 if (optname == TCP_MD5SIG_EXT &&
628 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
629 prefixlen = cmd.tcpm_prefixlen;
630 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
631 prefixlen > 32))
632 return -EINVAL;
633 } else {
634 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
635 }
636
637 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
638 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
639 struct net_device *dev;
640
641 rcu_read_lock();
642 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
643 if (dev && netif_is_l3_master(dev))
644 l3index = dev->ifindex;
645 rcu_read_unlock();
646
647 /* ok to reference set/not set outside of rcu;
648 * right now device MUST be an L3 master
649 */
650 if (!dev || !l3index)
651 return -EINVAL;
652 }
653
654 if (!cmd.tcpm_keylen) {
655 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657 AF_INET, prefixlen,
658 l3index, flags);
659 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
660 AF_INET6, prefixlen, l3index, flags);
661 }
662
663 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
664 return -EINVAL;
665
666 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
668 AF_INET, prefixlen, l3index, flags,
669 cmd.tcpm_key, cmd.tcpm_keylen,
670 GFP_KERNEL);
671
672 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673 AF_INET6, prefixlen, l3index, flags,
674 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
675 }
676
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)677 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
678 const struct in6_addr *daddr,
679 const struct in6_addr *saddr,
680 const struct tcphdr *th, int nbytes)
681 {
682 struct tcp6_pseudohdr *bp;
683 struct scatterlist sg;
684 struct tcphdr *_th;
685
686 bp = hp->scratch;
687 /* 1. TCP pseudo-header (RFC2460) */
688 bp->saddr = *saddr;
689 bp->daddr = *daddr;
690 bp->protocol = cpu_to_be32(IPPROTO_TCP);
691 bp->len = cpu_to_be32(nbytes);
692
693 _th = (struct tcphdr *)(bp + 1);
694 memcpy(_th, th, sizeof(*th));
695 _th->check = 0;
696
697 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
698 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
699 sizeof(*bp) + sizeof(*th));
700 return crypto_ahash_update(hp->md5_req);
701 }
702
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)703 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
704 const struct in6_addr *daddr, struct in6_addr *saddr,
705 const struct tcphdr *th)
706 {
707 struct tcp_md5sig_pool *hp;
708 struct ahash_request *req;
709
710 hp = tcp_get_md5sig_pool();
711 if (!hp)
712 goto clear_hash_noput;
713 req = hp->md5_req;
714
715 if (crypto_ahash_init(req))
716 goto clear_hash;
717 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
718 goto clear_hash;
719 if (tcp_md5_hash_key(hp, key))
720 goto clear_hash;
721 ahash_request_set_crypt(req, NULL, md5_hash, 0);
722 if (crypto_ahash_final(req))
723 goto clear_hash;
724
725 tcp_put_md5sig_pool();
726 return 0;
727
728 clear_hash:
729 tcp_put_md5sig_pool();
730 clear_hash_noput:
731 memset(md5_hash, 0, 16);
732 return 1;
733 }
734
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)735 static int tcp_v6_md5_hash_skb(char *md5_hash,
736 const struct tcp_md5sig_key *key,
737 const struct sock *sk,
738 const struct sk_buff *skb)
739 {
740 const struct in6_addr *saddr, *daddr;
741 struct tcp_md5sig_pool *hp;
742 struct ahash_request *req;
743 const struct tcphdr *th = tcp_hdr(skb);
744
745 if (sk) { /* valid for establish/request sockets */
746 saddr = &sk->sk_v6_rcv_saddr;
747 daddr = &sk->sk_v6_daddr;
748 } else {
749 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
750 saddr = &ip6h->saddr;
751 daddr = &ip6h->daddr;
752 }
753
754 hp = tcp_get_md5sig_pool();
755 if (!hp)
756 goto clear_hash_noput;
757 req = hp->md5_req;
758
759 if (crypto_ahash_init(req))
760 goto clear_hash;
761
762 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
763 goto clear_hash;
764 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
765 goto clear_hash;
766 if (tcp_md5_hash_key(hp, key))
767 goto clear_hash;
768 ahash_request_set_crypt(req, NULL, md5_hash, 0);
769 if (crypto_ahash_final(req))
770 goto clear_hash;
771
772 tcp_put_md5sig_pool();
773 return 0;
774
775 clear_hash:
776 tcp_put_md5sig_pool();
777 clear_hash_noput:
778 memset(md5_hash, 0, 16);
779 return 1;
780 }
781
782 #endif
783
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)784 static void tcp_v6_init_req(struct request_sock *req,
785 const struct sock *sk_listener,
786 struct sk_buff *skb)
787 {
788 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
789 struct inet_request_sock *ireq = inet_rsk(req);
790 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
791
792 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
793 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
794
795 /* So that link locals have meaning */
796 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
797 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
798 ireq->ir_iif = tcp_v6_iif(skb);
799
800 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
801 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
802 np->rxopt.bits.rxinfo ||
803 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
804 np->rxopt.bits.rxohlim || np->repflow)) {
805 refcount_inc(&skb->users);
806 ireq->pktopts = skb;
807 }
808 }
809
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)810 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
811 struct sk_buff *skb,
812 struct flowi *fl,
813 struct request_sock *req)
814 {
815 tcp_v6_init_req(req, sk, skb);
816
817 if (security_inet_conn_request(sk, skb, req))
818 return NULL;
819
820 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
821 }
822
823 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
824 .family = AF_INET6,
825 .obj_size = sizeof(struct tcp6_request_sock),
826 .rtx_syn_ack = tcp_rtx_synack,
827 .send_ack = tcp_v6_reqsk_send_ack,
828 .destructor = tcp_v6_reqsk_destructor,
829 .send_reset = tcp_v6_send_reset,
830 .syn_ack_timeout = tcp_syn_ack_timeout,
831 };
832
833 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
834 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
835 sizeof(struct ipv6hdr),
836 #ifdef CONFIG_TCP_MD5SIG
837 .req_md5_lookup = tcp_v6_md5_lookup,
838 .calc_md5_hash = tcp_v6_md5_hash_skb,
839 #endif
840 #ifdef CONFIG_SYN_COOKIES
841 .cookie_init_seq = cookie_v6_init_sequence,
842 #endif
843 .route_req = tcp_v6_route_req,
844 .init_seq = tcp_v6_init_seq,
845 .init_ts_off = tcp_v6_init_ts_off,
846 .send_synack = tcp_v6_send_synack,
847 };
848
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)849 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
850 u32 ack, u32 win, u32 tsval, u32 tsecr,
851 int oif, struct tcp_md5sig_key *key, int rst,
852 u8 tclass, __be32 label, u32 priority, u32 txhash)
853 {
854 const struct tcphdr *th = tcp_hdr(skb);
855 struct tcphdr *t1;
856 struct sk_buff *buff;
857 struct flowi6 fl6;
858 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
859 struct sock *ctl_sk = net->ipv6.tcp_sk;
860 unsigned int tot_len = sizeof(struct tcphdr);
861 __be32 mrst = 0, *topt;
862 struct dst_entry *dst;
863 __u32 mark = 0;
864
865 if (tsecr)
866 tot_len += TCPOLEN_TSTAMP_ALIGNED;
867 #ifdef CONFIG_TCP_MD5SIG
868 if (key)
869 tot_len += TCPOLEN_MD5SIG_ALIGNED;
870 #endif
871
872 #ifdef CONFIG_MPTCP
873 if (rst && !key) {
874 mrst = mptcp_reset_option(skb);
875
876 if (mrst)
877 tot_len += sizeof(__be32);
878 }
879 #endif
880
881 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
882 if (!buff)
883 return;
884
885 skb_reserve(buff, MAX_TCP_HEADER);
886
887 t1 = skb_push(buff, tot_len);
888 skb_reset_transport_header(buff);
889
890 /* Swap the send and the receive. */
891 memset(t1, 0, sizeof(*t1));
892 t1->dest = th->source;
893 t1->source = th->dest;
894 t1->doff = tot_len / 4;
895 t1->seq = htonl(seq);
896 t1->ack_seq = htonl(ack);
897 t1->ack = !rst || !th->ack;
898 t1->rst = rst;
899 t1->window = htons(win);
900
901 topt = (__be32 *)(t1 + 1);
902
903 if (tsecr) {
904 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
905 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
906 *topt++ = htonl(tsval);
907 *topt++ = htonl(tsecr);
908 }
909
910 if (mrst)
911 *topt++ = mrst;
912
913 #ifdef CONFIG_TCP_MD5SIG
914 if (key) {
915 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
917 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
918 &ipv6_hdr(skb)->saddr,
919 &ipv6_hdr(skb)->daddr, t1);
920 }
921 #endif
922
923 memset(&fl6, 0, sizeof(fl6));
924 fl6.daddr = ipv6_hdr(skb)->saddr;
925 fl6.saddr = ipv6_hdr(skb)->daddr;
926 fl6.flowlabel = label;
927
928 buff->ip_summed = CHECKSUM_PARTIAL;
929
930 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931
932 fl6.flowi6_proto = IPPROTO_TCP;
933 if (rt6_need_strict(&fl6.daddr) && !oif)
934 fl6.flowi6_oif = tcp_v6_iif(skb);
935 else {
936 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937 oif = skb->skb_iif;
938
939 fl6.flowi6_oif = oif;
940 }
941
942 if (sk) {
943 if (sk->sk_state == TCP_TIME_WAIT)
944 mark = inet_twsk(sk)->tw_mark;
945 else
946 mark = READ_ONCE(sk->sk_mark);
947 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
948 }
949 if (txhash) {
950 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
951 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
952 }
953 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954 fl6.fl6_dport = t1->dest;
955 fl6.fl6_sport = t1->source;
956 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
958
959 /* Pass a socket to ip6_dst_lookup either it is for RST
960 * Underlying function will use this to retrieve the network
961 * namespace
962 */
963 if (sk && sk->sk_state != TCP_TIME_WAIT)
964 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
965 else
966 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
967 if (!IS_ERR(dst)) {
968 skb_dst_set(buff, dst);
969 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
970 tclass & ~INET_ECN_MASK, priority);
971 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
972 if (rst)
973 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
974 return;
975 }
976
977 kfree_skb(buff);
978 }
979
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)980 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
981 {
982 const struct tcphdr *th = tcp_hdr(skb);
983 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
984 u32 seq = 0, ack_seq = 0;
985 struct tcp_md5sig_key *key = NULL;
986 #ifdef CONFIG_TCP_MD5SIG
987 const __u8 *hash_location = NULL;
988 unsigned char newhash[16];
989 int genhash;
990 struct sock *sk1 = NULL;
991 #endif
992 __be32 label = 0;
993 u32 priority = 0;
994 struct net *net;
995 u32 txhash = 0;
996 int oif = 0;
997
998 if (th->rst)
999 return;
1000
1001 /* If sk not NULL, it means we did a successful lookup and incoming
1002 * route had to be correct. prequeue might have dropped our dst.
1003 */
1004 if (!sk && !ipv6_unicast_destination(skb))
1005 return;
1006
1007 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1008 #ifdef CONFIG_TCP_MD5SIG
1009 rcu_read_lock();
1010 hash_location = tcp_parse_md5sig_option(th);
1011 if (sk && sk_fullsock(sk)) {
1012 int l3index;
1013
1014 /* sdif set, means packet ingressed via a device
1015 * in an L3 domain and inet_iif is set to it.
1016 */
1017 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1018 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1019 } else if (hash_location) {
1020 int dif = tcp_v6_iif_l3_slave(skb);
1021 int sdif = tcp_v6_sdif(skb);
1022 int l3index;
1023
1024 /*
1025 * active side is lost. Try to find listening socket through
1026 * source port, and then find md5 key through listening socket.
1027 * we are not loose security here:
1028 * Incoming packet is checked with md5 hash with finding key,
1029 * no RST generated if md5 hash doesn't match.
1030 */
1031 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1032 NULL, 0, &ipv6h->saddr, th->source,
1033 &ipv6h->daddr, ntohs(th->source),
1034 dif, sdif);
1035 if (!sk1)
1036 goto out;
1037
1038 /* sdif set, means packet ingressed via a device
1039 * in an L3 domain and dif is set to it.
1040 */
1041 l3index = tcp_v6_sdif(skb) ? dif : 0;
1042
1043 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044 if (!key)
1045 goto out;
1046
1047 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1048 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1049 goto out;
1050 }
1051 #endif
1052
1053 if (th->ack)
1054 seq = ntohl(th->ack_seq);
1055 else
1056 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1057 (th->doff << 2);
1058
1059 if (sk) {
1060 oif = sk->sk_bound_dev_if;
1061 if (sk_fullsock(sk)) {
1062 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1063
1064 trace_tcp_send_reset(sk, skb);
1065 if (np->repflow)
1066 label = ip6_flowlabel(ipv6h);
1067 priority = sk->sk_priority;
1068 txhash = sk->sk_txhash;
1069 }
1070 if (sk->sk_state == TCP_TIME_WAIT) {
1071 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1072 priority = inet_twsk(sk)->tw_priority;
1073 txhash = inet_twsk(sk)->tw_txhash;
1074 }
1075 } else {
1076 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1077 label = ip6_flowlabel(ipv6h);
1078 }
1079
1080 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1081 ipv6_get_dsfield(ipv6h), label, priority, txhash);
1082
1083 #ifdef CONFIG_TCP_MD5SIG
1084 out:
1085 rcu_read_unlock();
1086 #endif
1087 }
1088
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1090 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1091 struct tcp_md5sig_key *key, u8 tclass,
1092 __be32 label, u32 priority, u32 txhash)
1093 {
1094 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1095 tclass, label, priority, txhash);
1096 }
1097
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100 struct inet_timewait_sock *tw = inet_twsk(sk);
1101 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1102
1103 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1104 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1105 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1106 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1107 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1108 tw->tw_txhash);
1109
1110 inet_twsk_put(tw);
1111 }
1112
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114 struct request_sock *req)
1115 {
1116 int l3index;
1117
1118 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119
1120 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122 */
1123 /* RFC 7323 2.3
1124 * The window field (SEG.WND) of every outgoing segment, with the
1125 * exception of <SYN> segments, MUST be right-shifted by
1126 * Rcv.Wind.Shift bits:
1127 */
1128 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130 tcp_rsk(req)->rcv_nxt,
1131 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1134 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135 ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1136 READ_ONCE(sk->sk_priority),
1137 READ_ONCE(tcp_rsk(req)->txhash));
1138 }
1139
1140
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1141 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1142 {
1143 #ifdef CONFIG_SYN_COOKIES
1144 const struct tcphdr *th = tcp_hdr(skb);
1145
1146 if (!th->syn)
1147 sk = cookie_v6_check(sk, skb);
1148 #endif
1149 return sk;
1150 }
1151
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1152 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1153 struct tcphdr *th, u32 *cookie)
1154 {
1155 u16 mss = 0;
1156 #ifdef CONFIG_SYN_COOKIES
1157 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1158 &tcp_request_sock_ipv6_ops, sk, th);
1159 if (mss) {
1160 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1161 tcp_synq_overflow(sk);
1162 }
1163 #endif
1164 return mss;
1165 }
1166
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1167 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1168 {
1169 if (skb->protocol == htons(ETH_P_IP))
1170 return tcp_v4_conn_request(sk, skb);
1171
1172 if (!ipv6_unicast_destination(skb))
1173 goto drop;
1174
1175 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1176 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1177 return 0;
1178 }
1179
1180 return tcp_conn_request(&tcp6_request_sock_ops,
1181 &tcp_request_sock_ipv6_ops, sk, skb);
1182
1183 drop:
1184 tcp_listendrop(sk);
1185 return 0; /* don't send reset */
1186 }
1187
tcp_v6_restore_cb(struct sk_buff * skb)1188 static void tcp_v6_restore_cb(struct sk_buff *skb)
1189 {
1190 /* We need to move header back to the beginning if xfrm6_policy_check()
1191 * and tcp_v6_fill_cb() are going to be called again.
1192 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1193 */
1194 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1195 sizeof(struct inet6_skb_parm));
1196 }
1197
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1198 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1199 struct request_sock *req,
1200 struct dst_entry *dst,
1201 struct request_sock *req_unhash,
1202 bool *own_req)
1203 {
1204 struct inet_request_sock *ireq;
1205 struct ipv6_pinfo *newnp;
1206 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1207 struct ipv6_txoptions *opt;
1208 struct inet_sock *newinet;
1209 bool found_dup_sk = false;
1210 struct tcp_sock *newtp;
1211 struct sock *newsk;
1212 #ifdef CONFIG_TCP_MD5SIG
1213 struct tcp_md5sig_key *key;
1214 int l3index;
1215 #endif
1216 struct flowi6 fl6;
1217
1218 if (skb->protocol == htons(ETH_P_IP)) {
1219 /*
1220 * v6 mapped
1221 */
1222
1223 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1224 req_unhash, own_req);
1225
1226 if (!newsk)
1227 return NULL;
1228
1229 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1230
1231 newnp = tcp_inet6_sk(newsk);
1232 newtp = tcp_sk(newsk);
1233
1234 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1235
1236 newnp->saddr = newsk->sk_v6_rcv_saddr;
1237
1238 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1239 if (sk_is_mptcp(newsk))
1240 mptcpv6_handle_mapped(newsk, true);
1241 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1242 #ifdef CONFIG_TCP_MD5SIG
1243 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1244 #endif
1245
1246 newnp->ipv6_mc_list = NULL;
1247 newnp->ipv6_ac_list = NULL;
1248 newnp->ipv6_fl_list = NULL;
1249 newnp->pktoptions = NULL;
1250 newnp->opt = NULL;
1251 newnp->mcast_oif = inet_iif(skb);
1252 newnp->mcast_hops = ip_hdr(skb)->ttl;
1253 newnp->rcv_flowinfo = 0;
1254 if (np->repflow)
1255 newnp->flow_label = 0;
1256
1257 /*
1258 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1259 * here, tcp_create_openreq_child now does this for us, see the comment in
1260 * that function for the gory details. -acme
1261 */
1262
1263 /* It is tricky place. Until this moment IPv4 tcp
1264 worked with IPv6 icsk.icsk_af_ops.
1265 Sync it now.
1266 */
1267 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1268
1269 return newsk;
1270 }
1271
1272 ireq = inet_rsk(req);
1273
1274 if (sk_acceptq_is_full(sk))
1275 goto out_overflow;
1276
1277 if (!dst) {
1278 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1279 if (!dst)
1280 goto out;
1281 }
1282
1283 newsk = tcp_create_openreq_child(sk, req, skb);
1284 if (!newsk)
1285 goto out_nonewsk;
1286
1287 /*
1288 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1289 * count here, tcp_create_openreq_child now does this for us, see the
1290 * comment in that function for the gory details. -acme
1291 */
1292
1293 newsk->sk_gso_type = SKB_GSO_TCPV6;
1294 ip6_dst_store(newsk, dst, NULL, NULL);
1295 inet6_sk_rx_dst_set(newsk, skb);
1296
1297 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1298
1299 newtp = tcp_sk(newsk);
1300 newinet = inet_sk(newsk);
1301 newnp = tcp_inet6_sk(newsk);
1302
1303 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1304
1305 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1306 newnp->saddr = ireq->ir_v6_loc_addr;
1307 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1308 newsk->sk_bound_dev_if = ireq->ir_iif;
1309
1310 /* Now IPv6 options...
1311
1312 First: no IPv4 options.
1313 */
1314 newinet->inet_opt = NULL;
1315 newnp->ipv6_mc_list = NULL;
1316 newnp->ipv6_ac_list = NULL;
1317 newnp->ipv6_fl_list = NULL;
1318
1319 /* Clone RX bits */
1320 newnp->rxopt.all = np->rxopt.all;
1321
1322 newnp->pktoptions = NULL;
1323 newnp->opt = NULL;
1324 newnp->mcast_oif = tcp_v6_iif(skb);
1325 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1326 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1327 if (np->repflow)
1328 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1329
1330 /* Set ToS of the new socket based upon the value of incoming SYN.
1331 * ECT bits are set later in tcp_init_transfer().
1332 */
1333 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1334 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1335
1336 /* Clone native IPv6 options from listening socket (if any)
1337
1338 Yes, keeping reference count would be much more clever,
1339 but we make one more one thing there: reattach optmem
1340 to newsk.
1341 */
1342 opt = ireq->ipv6_opt;
1343 if (!opt)
1344 opt = rcu_dereference(np->opt);
1345 if (opt) {
1346 opt = ipv6_dup_options(newsk, opt);
1347 RCU_INIT_POINTER(newnp->opt, opt);
1348 }
1349 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1350 if (opt)
1351 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1352 opt->opt_flen;
1353
1354 tcp_ca_openreq_child(newsk, dst);
1355
1356 tcp_sync_mss(newsk, dst_mtu(dst));
1357 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1358
1359 tcp_initialize_rcv_mss(newsk);
1360
1361 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1362 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1363
1364 #ifdef CONFIG_TCP_MD5SIG
1365 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1366
1367 /* Copy over the MD5 key from the original socket */
1368 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1369 if (key) {
1370 /* We're using one, so create a matching key
1371 * on the newsk structure. If we fail to get
1372 * memory, then we end up not copying the key
1373 * across. Shucks.
1374 */
1375 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1376 AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1377 sk_gfp_mask(sk, GFP_ATOMIC));
1378 }
1379 #endif
1380
1381 if (__inet_inherit_port(sk, newsk) < 0) {
1382 inet_csk_prepare_forced_close(newsk);
1383 tcp_done(newsk);
1384 goto out;
1385 }
1386 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1387 &found_dup_sk);
1388 if (*own_req) {
1389 tcp_move_syn(newtp, req);
1390
1391 /* Clone pktoptions received with SYN, if we own the req */
1392 if (ireq->pktopts) {
1393 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1394 consume_skb(ireq->pktopts);
1395 ireq->pktopts = NULL;
1396 if (newnp->pktoptions)
1397 tcp_v6_restore_cb(newnp->pktoptions);
1398 }
1399 } else {
1400 if (!req_unhash && found_dup_sk) {
1401 /* This code path should only be executed in the
1402 * syncookie case only
1403 */
1404 bh_unlock_sock(newsk);
1405 sock_put(newsk);
1406 newsk = NULL;
1407 }
1408 }
1409
1410 return newsk;
1411
1412 out_overflow:
1413 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1414 out_nonewsk:
1415 dst_release(dst);
1416 out:
1417 tcp_listendrop(sk);
1418 return NULL;
1419 }
1420
1421 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1422 u32));
1423 /* The socket must have it's spinlock held when we get
1424 * here, unless it is a TCP_LISTEN socket.
1425 *
1426 * We have a potential double-lock case here, so even when
1427 * doing backlog processing we use the BH locking scheme.
1428 * This is because we cannot sleep with the original spinlock
1429 * held.
1430 */
1431 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1432 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1433 {
1434 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1435 struct sk_buff *opt_skb = NULL;
1436 enum skb_drop_reason reason;
1437 struct tcp_sock *tp;
1438
1439 /* Imagine: socket is IPv6. IPv4 packet arrives,
1440 goes to IPv4 receive handler and backlogged.
1441 From backlog it always goes here. Kerboom...
1442 Fortunately, tcp_rcv_established and rcv_established
1443 handle them correctly, but it is not case with
1444 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1445 */
1446
1447 if (skb->protocol == htons(ETH_P_IP))
1448 return tcp_v4_do_rcv(sk, skb);
1449
1450 /*
1451 * socket locking is here for SMP purposes as backlog rcv
1452 * is currently called with bh processing disabled.
1453 */
1454
1455 /* Do Stevens' IPV6_PKTOPTIONS.
1456
1457 Yes, guys, it is the only place in our code, where we
1458 may make it not affecting IPv4.
1459 The rest of code is protocol independent,
1460 and I do not like idea to uglify IPv4.
1461
1462 Actually, all the idea behind IPV6_PKTOPTIONS
1463 looks not very well thought. For now we latch
1464 options, received in the last packet, enqueued
1465 by tcp. Feel free to propose better solution.
1466 --ANK (980728)
1467 */
1468 if (np->rxopt.all)
1469 opt_skb = skb_clone_and_charge_r(skb, sk);
1470
1471 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1472 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1473 struct dst_entry *dst;
1474
1475 dst = rcu_dereference_protected(sk->sk_rx_dst,
1476 lockdep_sock_is_held(sk));
1477
1478 sock_rps_save_rxhash(sk, skb);
1479 sk_mark_napi_id(sk, skb);
1480 if (dst) {
1481 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1482 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1483 dst, sk->sk_rx_dst_cookie) == NULL) {
1484 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1485 dst_release(dst);
1486 }
1487 }
1488
1489 tcp_rcv_established(sk, skb);
1490 if (opt_skb)
1491 goto ipv6_pktoptions;
1492 return 0;
1493 }
1494
1495 if (tcp_checksum_complete(skb))
1496 goto csum_err;
1497
1498 if (sk->sk_state == TCP_LISTEN) {
1499 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1500
1501 if (!nsk)
1502 goto discard;
1503
1504 if (nsk != sk) {
1505 if (tcp_child_process(sk, nsk, skb))
1506 goto reset;
1507 if (opt_skb)
1508 __kfree_skb(opt_skb);
1509 return 0;
1510 }
1511 } else
1512 sock_rps_save_rxhash(sk, skb);
1513
1514 if (tcp_rcv_state_process(sk, skb))
1515 goto reset;
1516 if (opt_skb)
1517 goto ipv6_pktoptions;
1518 return 0;
1519
1520 reset:
1521 tcp_v6_send_reset(sk, skb);
1522 discard:
1523 if (opt_skb)
1524 __kfree_skb(opt_skb);
1525 kfree_skb_reason(skb, reason);
1526 return 0;
1527 csum_err:
1528 reason = SKB_DROP_REASON_TCP_CSUM;
1529 trace_tcp_bad_csum(skb);
1530 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1531 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1532 goto discard;
1533
1534
1535 ipv6_pktoptions:
1536 /* Do you ask, what is it?
1537
1538 1. skb was enqueued by tcp.
1539 2. skb is added to tail of read queue, rather than out of order.
1540 3. socket is not in passive state.
1541 4. Finally, it really contains options, which user wants to receive.
1542 */
1543 tp = tcp_sk(sk);
1544 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1545 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1546 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1547 np->mcast_oif = tcp_v6_iif(opt_skb);
1548 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1549 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1550 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1551 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1552 if (np->repflow)
1553 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1554 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1555 tcp_v6_restore_cb(opt_skb);
1556 opt_skb = xchg(&np->pktoptions, opt_skb);
1557 } else {
1558 __kfree_skb(opt_skb);
1559 opt_skb = xchg(&np->pktoptions, NULL);
1560 }
1561 }
1562
1563 consume_skb(opt_skb);
1564 return 0;
1565 }
1566
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1567 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1568 const struct tcphdr *th)
1569 {
1570 /* This is tricky: we move IP6CB at its correct location into
1571 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1572 * _decode_session6() uses IP6CB().
1573 * barrier() makes sure compiler won't play aliasing games.
1574 */
1575 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1576 sizeof(struct inet6_skb_parm));
1577 barrier();
1578
1579 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1580 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1581 skb->len - th->doff*4);
1582 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1583 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1584 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1585 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1586 TCP_SKB_CB(skb)->sacked = 0;
1587 TCP_SKB_CB(skb)->has_rxtstamp =
1588 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1589 }
1590
tcp_v6_rcv(struct sk_buff * skb)1591 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1592 {
1593 enum skb_drop_reason drop_reason;
1594 int sdif = inet6_sdif(skb);
1595 int dif = inet6_iif(skb);
1596 const struct tcphdr *th;
1597 const struct ipv6hdr *hdr;
1598 bool refcounted;
1599 struct sock *sk;
1600 int ret;
1601 struct net *net = dev_net(skb->dev);
1602
1603 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1604 if (skb->pkt_type != PACKET_HOST)
1605 goto discard_it;
1606
1607 /*
1608 * Count it even if it's bad.
1609 */
1610 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1611
1612 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1613 goto discard_it;
1614
1615 th = (const struct tcphdr *)skb->data;
1616
1617 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1618 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1619 goto bad_packet;
1620 }
1621 if (!pskb_may_pull(skb, th->doff*4))
1622 goto discard_it;
1623
1624 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1625 goto csum_error;
1626
1627 th = (const struct tcphdr *)skb->data;
1628 hdr = ipv6_hdr(skb);
1629
1630 lookup:
1631 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1632 th->source, th->dest, inet6_iif(skb), sdif,
1633 &refcounted);
1634 if (!sk)
1635 goto no_tcp_socket;
1636
1637 process:
1638 if (sk->sk_state == TCP_TIME_WAIT)
1639 goto do_time_wait;
1640
1641 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1642 struct request_sock *req = inet_reqsk(sk);
1643 bool req_stolen = false;
1644 struct sock *nsk;
1645
1646 sk = req->rsk_listener;
1647 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1648 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1649 else
1650 drop_reason = tcp_inbound_md5_hash(sk, skb,
1651 &hdr->saddr, &hdr->daddr,
1652 AF_INET6, dif, sdif);
1653 if (drop_reason) {
1654 sk_drops_add(sk, skb);
1655 reqsk_put(req);
1656 goto discard_it;
1657 }
1658 if (tcp_checksum_complete(skb)) {
1659 reqsk_put(req);
1660 goto csum_error;
1661 }
1662 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1663 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1664 if (!nsk) {
1665 inet_csk_reqsk_queue_drop_and_put(sk, req);
1666 goto lookup;
1667 }
1668 sk = nsk;
1669 /* reuseport_migrate_sock() has already held one sk_refcnt
1670 * before returning.
1671 */
1672 } else {
1673 sock_hold(sk);
1674 }
1675 refcounted = true;
1676 nsk = NULL;
1677 if (!tcp_filter(sk, skb)) {
1678 th = (const struct tcphdr *)skb->data;
1679 hdr = ipv6_hdr(skb);
1680 tcp_v6_fill_cb(skb, hdr, th);
1681 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1682 } else {
1683 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1684 }
1685 if (!nsk) {
1686 reqsk_put(req);
1687 if (req_stolen) {
1688 /* Another cpu got exclusive access to req
1689 * and created a full blown socket.
1690 * Try to feed this packet to this socket
1691 * instead of discarding it.
1692 */
1693 tcp_v6_restore_cb(skb);
1694 sock_put(sk);
1695 goto lookup;
1696 }
1697 goto discard_and_relse;
1698 }
1699 nf_reset_ct(skb);
1700 if (nsk == sk) {
1701 reqsk_put(req);
1702 tcp_v6_restore_cb(skb);
1703 } else if (tcp_child_process(sk, nsk, skb)) {
1704 tcp_v6_send_reset(nsk, skb);
1705 goto discard_and_relse;
1706 } else {
1707 sock_put(sk);
1708 return 0;
1709 }
1710 }
1711
1712 if (static_branch_unlikely(&ip6_min_hopcount)) {
1713 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1714 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1715 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1716 goto discard_and_relse;
1717 }
1718 }
1719
1720 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1721 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1722 goto discard_and_relse;
1723 }
1724
1725 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1726 AF_INET6, dif, sdif);
1727 if (drop_reason)
1728 goto discard_and_relse;
1729
1730 nf_reset_ct(skb);
1731
1732 if (tcp_filter(sk, skb)) {
1733 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1734 goto discard_and_relse;
1735 }
1736 th = (const struct tcphdr *)skb->data;
1737 hdr = ipv6_hdr(skb);
1738 tcp_v6_fill_cb(skb, hdr, th);
1739
1740 skb->dev = NULL;
1741
1742 if (sk->sk_state == TCP_LISTEN) {
1743 ret = tcp_v6_do_rcv(sk, skb);
1744 goto put_and_return;
1745 }
1746
1747 sk_incoming_cpu_update(sk);
1748
1749 bh_lock_sock_nested(sk);
1750 tcp_segs_in(tcp_sk(sk), skb);
1751 ret = 0;
1752 if (!sock_owned_by_user(sk)) {
1753 ret = tcp_v6_do_rcv(sk, skb);
1754 } else {
1755 if (tcp_add_backlog(sk, skb, &drop_reason))
1756 goto discard_and_relse;
1757 }
1758 bh_unlock_sock(sk);
1759 put_and_return:
1760 if (refcounted)
1761 sock_put(sk);
1762 return ret ? -1 : 0;
1763
1764 no_tcp_socket:
1765 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1766 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1767 goto discard_it;
1768
1769 tcp_v6_fill_cb(skb, hdr, th);
1770
1771 if (tcp_checksum_complete(skb)) {
1772 csum_error:
1773 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1774 trace_tcp_bad_csum(skb);
1775 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1776 bad_packet:
1777 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1778 } else {
1779 tcp_v6_send_reset(NULL, skb);
1780 }
1781
1782 discard_it:
1783 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1784 kfree_skb_reason(skb, drop_reason);
1785 return 0;
1786
1787 discard_and_relse:
1788 sk_drops_add(sk, skb);
1789 if (refcounted)
1790 sock_put(sk);
1791 goto discard_it;
1792
1793 do_time_wait:
1794 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1795 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1796 inet_twsk_put(inet_twsk(sk));
1797 goto discard_it;
1798 }
1799
1800 tcp_v6_fill_cb(skb, hdr, th);
1801
1802 if (tcp_checksum_complete(skb)) {
1803 inet_twsk_put(inet_twsk(sk));
1804 goto csum_error;
1805 }
1806
1807 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1808 case TCP_TW_SYN:
1809 {
1810 struct sock *sk2;
1811
1812 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1813 skb, __tcp_hdrlen(th),
1814 &ipv6_hdr(skb)->saddr, th->source,
1815 &ipv6_hdr(skb)->daddr,
1816 ntohs(th->dest),
1817 tcp_v6_iif_l3_slave(skb),
1818 sdif);
1819 if (sk2) {
1820 struct inet_timewait_sock *tw = inet_twsk(sk);
1821 inet_twsk_deschedule_put(tw);
1822 sk = sk2;
1823 tcp_v6_restore_cb(skb);
1824 refcounted = false;
1825 goto process;
1826 }
1827 }
1828 /* to ACK */
1829 fallthrough;
1830 case TCP_TW_ACK:
1831 tcp_v6_timewait_ack(sk, skb);
1832 break;
1833 case TCP_TW_RST:
1834 tcp_v6_send_reset(sk, skb);
1835 inet_twsk_deschedule_put(inet_twsk(sk));
1836 goto discard_it;
1837 case TCP_TW_SUCCESS:
1838 ;
1839 }
1840 goto discard_it;
1841 }
1842
tcp_v6_early_demux(struct sk_buff * skb)1843 void tcp_v6_early_demux(struct sk_buff *skb)
1844 {
1845 struct net *net = dev_net(skb->dev);
1846 const struct ipv6hdr *hdr;
1847 const struct tcphdr *th;
1848 struct sock *sk;
1849
1850 if (skb->pkt_type != PACKET_HOST)
1851 return;
1852
1853 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1854 return;
1855
1856 hdr = ipv6_hdr(skb);
1857 th = tcp_hdr(skb);
1858
1859 if (th->doff < sizeof(struct tcphdr) / 4)
1860 return;
1861
1862 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1863 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1864 &hdr->saddr, th->source,
1865 &hdr->daddr, ntohs(th->dest),
1866 inet6_iif(skb), inet6_sdif(skb));
1867 if (sk) {
1868 skb->sk = sk;
1869 skb->destructor = sock_edemux;
1870 if (sk_fullsock(sk)) {
1871 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1872
1873 if (dst)
1874 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1875 if (dst &&
1876 sk->sk_rx_dst_ifindex == skb->skb_iif)
1877 skb_dst_set_noref(skb, dst);
1878 }
1879 }
1880 }
1881
1882 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1883 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1884 .twsk_unique = tcp_twsk_unique,
1885 .twsk_destructor = tcp_twsk_destructor,
1886 };
1887
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1888 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1889 {
1890 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1891 }
1892
1893 const struct inet_connection_sock_af_ops ipv6_specific = {
1894 .queue_xmit = inet6_csk_xmit,
1895 .send_check = tcp_v6_send_check,
1896 .rebuild_header = inet6_sk_rebuild_header,
1897 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1898 .conn_request = tcp_v6_conn_request,
1899 .syn_recv_sock = tcp_v6_syn_recv_sock,
1900 .net_header_len = sizeof(struct ipv6hdr),
1901 .net_frag_header_len = sizeof(struct frag_hdr),
1902 .setsockopt = ipv6_setsockopt,
1903 .getsockopt = ipv6_getsockopt,
1904 .addr2sockaddr = inet6_csk_addr2sockaddr,
1905 .sockaddr_len = sizeof(struct sockaddr_in6),
1906 .mtu_reduced = tcp_v6_mtu_reduced,
1907 };
1908
1909 #ifdef CONFIG_TCP_MD5SIG
1910 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1911 .md5_lookup = tcp_v6_md5_lookup,
1912 .calc_md5_hash = tcp_v6_md5_hash_skb,
1913 .md5_parse = tcp_v6_parse_md5_keys,
1914 };
1915 #endif
1916
1917 /*
1918 * TCP over IPv4 via INET6 API
1919 */
1920 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1921 .queue_xmit = ip_queue_xmit,
1922 .send_check = tcp_v4_send_check,
1923 .rebuild_header = inet_sk_rebuild_header,
1924 .sk_rx_dst_set = inet_sk_rx_dst_set,
1925 .conn_request = tcp_v6_conn_request,
1926 .syn_recv_sock = tcp_v6_syn_recv_sock,
1927 .net_header_len = sizeof(struct iphdr),
1928 .setsockopt = ipv6_setsockopt,
1929 .getsockopt = ipv6_getsockopt,
1930 .addr2sockaddr = inet6_csk_addr2sockaddr,
1931 .sockaddr_len = sizeof(struct sockaddr_in6),
1932 .mtu_reduced = tcp_v4_mtu_reduced,
1933 };
1934
1935 #ifdef CONFIG_TCP_MD5SIG
1936 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1937 .md5_lookup = tcp_v4_md5_lookup,
1938 .calc_md5_hash = tcp_v4_md5_hash_skb,
1939 .md5_parse = tcp_v6_parse_md5_keys,
1940 };
1941 #endif
1942
1943 /* NOTE: A lot of things set to zero explicitly by call to
1944 * sk_alloc() so need not be done here.
1945 */
tcp_v6_init_sock(struct sock * sk)1946 static int tcp_v6_init_sock(struct sock *sk)
1947 {
1948 struct inet_connection_sock *icsk = inet_csk(sk);
1949
1950 tcp_init_sock(sk);
1951
1952 icsk->icsk_af_ops = &ipv6_specific;
1953
1954 #ifdef CONFIG_TCP_MD5SIG
1955 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1956 #endif
1957
1958 return 0;
1959 }
1960
1961 #ifdef CONFIG_PROC_FS
1962 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1963 static void get_openreq6(struct seq_file *seq,
1964 const struct request_sock *req, int i)
1965 {
1966 long ttd = req->rsk_timer.expires - jiffies;
1967 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1968 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1969
1970 if (ttd < 0)
1971 ttd = 0;
1972
1973 seq_printf(seq,
1974 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1975 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1976 i,
1977 src->s6_addr32[0], src->s6_addr32[1],
1978 src->s6_addr32[2], src->s6_addr32[3],
1979 inet_rsk(req)->ir_num,
1980 dest->s6_addr32[0], dest->s6_addr32[1],
1981 dest->s6_addr32[2], dest->s6_addr32[3],
1982 ntohs(inet_rsk(req)->ir_rmt_port),
1983 TCP_SYN_RECV,
1984 0, 0, /* could print option size, but that is af dependent. */
1985 1, /* timers active (only the expire timer) */
1986 jiffies_to_clock_t(ttd),
1987 req->num_timeout,
1988 from_kuid_munged(seq_user_ns(seq),
1989 sock_i_uid(req->rsk_listener)),
1990 0, /* non standard timer */
1991 0, /* open_requests have no inode */
1992 0, req);
1993 }
1994
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1995 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1996 {
1997 const struct in6_addr *dest, *src;
1998 __u16 destp, srcp;
1999 int timer_active;
2000 unsigned long timer_expires;
2001 const struct inet_sock *inet = inet_sk(sp);
2002 const struct tcp_sock *tp = tcp_sk(sp);
2003 const struct inet_connection_sock *icsk = inet_csk(sp);
2004 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2005 int rx_queue;
2006 int state;
2007
2008 dest = &sp->sk_v6_daddr;
2009 src = &sp->sk_v6_rcv_saddr;
2010 destp = ntohs(inet->inet_dport);
2011 srcp = ntohs(inet->inet_sport);
2012
2013 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2014 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2015 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2016 timer_active = 1;
2017 timer_expires = icsk->icsk_timeout;
2018 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2019 timer_active = 4;
2020 timer_expires = icsk->icsk_timeout;
2021 } else if (timer_pending(&sp->sk_timer)) {
2022 timer_active = 2;
2023 timer_expires = sp->sk_timer.expires;
2024 } else {
2025 timer_active = 0;
2026 timer_expires = jiffies;
2027 }
2028
2029 state = inet_sk_state_load(sp);
2030 if (state == TCP_LISTEN)
2031 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2032 else
2033 /* Because we don't lock the socket,
2034 * we might find a transient negative value.
2035 */
2036 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2037 READ_ONCE(tp->copied_seq), 0);
2038
2039 seq_printf(seq,
2040 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2041 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2042 i,
2043 src->s6_addr32[0], src->s6_addr32[1],
2044 src->s6_addr32[2], src->s6_addr32[3], srcp,
2045 dest->s6_addr32[0], dest->s6_addr32[1],
2046 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2047 state,
2048 READ_ONCE(tp->write_seq) - tp->snd_una,
2049 rx_queue,
2050 timer_active,
2051 jiffies_delta_to_clock_t(timer_expires - jiffies),
2052 icsk->icsk_retransmits,
2053 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2054 icsk->icsk_probes_out,
2055 sock_i_ino(sp),
2056 refcount_read(&sp->sk_refcnt), sp,
2057 jiffies_to_clock_t(icsk->icsk_rto),
2058 jiffies_to_clock_t(icsk->icsk_ack.ato),
2059 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2060 tcp_snd_cwnd(tp),
2061 state == TCP_LISTEN ?
2062 fastopenq->max_qlen :
2063 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2064 );
2065 }
2066
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2067 static void get_timewait6_sock(struct seq_file *seq,
2068 struct inet_timewait_sock *tw, int i)
2069 {
2070 long delta = tw->tw_timer.expires - jiffies;
2071 const struct in6_addr *dest, *src;
2072 __u16 destp, srcp;
2073
2074 dest = &tw->tw_v6_daddr;
2075 src = &tw->tw_v6_rcv_saddr;
2076 destp = ntohs(tw->tw_dport);
2077 srcp = ntohs(tw->tw_sport);
2078
2079 seq_printf(seq,
2080 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2081 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2082 i,
2083 src->s6_addr32[0], src->s6_addr32[1],
2084 src->s6_addr32[2], src->s6_addr32[3], srcp,
2085 dest->s6_addr32[0], dest->s6_addr32[1],
2086 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2087 tw->tw_substate, 0, 0,
2088 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2089 refcount_read(&tw->tw_refcnt), tw);
2090 }
2091
tcp6_seq_show(struct seq_file * seq,void * v)2092 static int tcp6_seq_show(struct seq_file *seq, void *v)
2093 {
2094 struct tcp_iter_state *st;
2095 struct sock *sk = v;
2096
2097 if (v == SEQ_START_TOKEN) {
2098 seq_puts(seq,
2099 " sl "
2100 "local_address "
2101 "remote_address "
2102 "st tx_queue rx_queue tr tm->when retrnsmt"
2103 " uid timeout inode\n");
2104 goto out;
2105 }
2106 st = seq->private;
2107
2108 if (sk->sk_state == TCP_TIME_WAIT)
2109 get_timewait6_sock(seq, v, st->num);
2110 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2111 get_openreq6(seq, v, st->num);
2112 else
2113 get_tcp6_sock(seq, v, st->num);
2114 out:
2115 return 0;
2116 }
2117
2118 static const struct seq_operations tcp6_seq_ops = {
2119 .show = tcp6_seq_show,
2120 .start = tcp_seq_start,
2121 .next = tcp_seq_next,
2122 .stop = tcp_seq_stop,
2123 };
2124
2125 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2126 .family = AF_INET6,
2127 };
2128
tcp6_proc_init(struct net * net)2129 int __net_init tcp6_proc_init(struct net *net)
2130 {
2131 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2132 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2133 return -ENOMEM;
2134 return 0;
2135 }
2136
tcp6_proc_exit(struct net * net)2137 void tcp6_proc_exit(struct net *net)
2138 {
2139 remove_proc_entry("tcp6", net->proc_net);
2140 }
2141 #endif
2142
2143 struct proto tcpv6_prot = {
2144 .name = "TCPv6",
2145 .owner = THIS_MODULE,
2146 .close = tcp_close,
2147 .pre_connect = tcp_v6_pre_connect,
2148 .connect = tcp_v6_connect,
2149 .disconnect = tcp_disconnect,
2150 .accept = inet_csk_accept,
2151 .ioctl = tcp_ioctl,
2152 .init = tcp_v6_init_sock,
2153 .destroy = tcp_v4_destroy_sock,
2154 .shutdown = tcp_shutdown,
2155 .setsockopt = tcp_setsockopt,
2156 .getsockopt = tcp_getsockopt,
2157 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2158 .keepalive = tcp_set_keepalive,
2159 .recvmsg = tcp_recvmsg,
2160 .sendmsg = tcp_sendmsg,
2161 .sendpage = tcp_sendpage,
2162 .backlog_rcv = tcp_v6_do_rcv,
2163 .release_cb = tcp_release_cb,
2164 .hash = inet6_hash,
2165 .unhash = inet_unhash,
2166 .get_port = inet_csk_get_port,
2167 .put_port = inet_put_port,
2168 #ifdef CONFIG_BPF_SYSCALL
2169 .psock_update_sk_prot = tcp_bpf_update_proto,
2170 #endif
2171 .enter_memory_pressure = tcp_enter_memory_pressure,
2172 .leave_memory_pressure = tcp_leave_memory_pressure,
2173 .stream_memory_free = tcp_stream_memory_free,
2174 .sockets_allocated = &tcp_sockets_allocated,
2175
2176 .memory_allocated = &tcp_memory_allocated,
2177 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2178
2179 .memory_pressure = &tcp_memory_pressure,
2180 .orphan_count = &tcp_orphan_count,
2181 .sysctl_mem = sysctl_tcp_mem,
2182 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2183 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2184 .max_header = MAX_TCP_HEADER,
2185 .obj_size = sizeof(struct tcp6_sock),
2186 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2187 .twsk_prot = &tcp6_timewait_sock_ops,
2188 .rsk_prot = &tcp6_request_sock_ops,
2189 .h.hashinfo = NULL,
2190 .no_autobind = true,
2191 .diag_destroy = tcp_abort,
2192 };
2193 EXPORT_SYMBOL_GPL(tcpv6_prot);
2194
2195 static const struct inet6_protocol tcpv6_protocol = {
2196 .handler = tcp_v6_rcv,
2197 .err_handler = tcp_v6_err,
2198 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2199 };
2200
2201 static struct inet_protosw tcpv6_protosw = {
2202 .type = SOCK_STREAM,
2203 .protocol = IPPROTO_TCP,
2204 .prot = &tcpv6_prot,
2205 .ops = &inet6_stream_ops,
2206 .flags = INET_PROTOSW_PERMANENT |
2207 INET_PROTOSW_ICSK,
2208 };
2209
tcpv6_net_init(struct net * net)2210 static int __net_init tcpv6_net_init(struct net *net)
2211 {
2212 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2213 SOCK_RAW, IPPROTO_TCP, net);
2214 }
2215
tcpv6_net_exit(struct net * net)2216 static void __net_exit tcpv6_net_exit(struct net *net)
2217 {
2218 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2219 }
2220
tcpv6_net_exit_batch(struct list_head * net_exit_list)2221 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2222 {
2223 tcp_twsk_purge(net_exit_list, AF_INET6);
2224 }
2225
2226 static struct pernet_operations tcpv6_net_ops = {
2227 .init = tcpv6_net_init,
2228 .exit = tcpv6_net_exit,
2229 .exit_batch = tcpv6_net_exit_batch,
2230 };
2231
tcpv6_init(void)2232 int __init tcpv6_init(void)
2233 {
2234 int ret;
2235
2236 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2237 if (ret)
2238 goto out;
2239
2240 /* register inet6 protocol */
2241 ret = inet6_register_protosw(&tcpv6_protosw);
2242 if (ret)
2243 goto out_tcpv6_protocol;
2244
2245 ret = register_pernet_subsys(&tcpv6_net_ops);
2246 if (ret)
2247 goto out_tcpv6_protosw;
2248
2249 ret = mptcpv6_init();
2250 if (ret)
2251 goto out_tcpv6_pernet_subsys;
2252
2253 out:
2254 return ret;
2255
2256 out_tcpv6_pernet_subsys:
2257 unregister_pernet_subsys(&tcpv6_net_ops);
2258 out_tcpv6_protosw:
2259 inet6_unregister_protosw(&tcpv6_protosw);
2260 out_tcpv6_protocol:
2261 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2262 goto out;
2263 }
2264
tcpv6_exit(void)2265 void tcpv6_exit(void)
2266 {
2267 unregister_pernet_subsys(&tcpv6_net_ops);
2268 inet6_unregister_protosw(&tcpv6_protosw);
2269 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2270 }
2271