1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 struct dst_entry *dst = skb_dst(skb);
106
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 }
114 }
115
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->dest,
121 tcp_hdr(skb)->source);
122 }
123
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 int addr_len)
132 {
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
136 */
137 if (addr_len < SIN6_LEN_RFC2133)
138 return -EINVAL;
139
140 sock_owned_by_me(sk);
141
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 int addr_len)
147 {
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
155 struct flowi6 fl6;
156 struct dst_entry *dst;
157 int addr_type;
158 int err;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161 if (addr_len < SIN6_LEN_RFC2133)
162 return -EINVAL;
163
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
166
167 memset(&fl6, 0, sizeof(fl6));
168
169 if (np->sndflow) {
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
176 return -EINVAL;
177 fl6_sock_release(flowlabel);
178 }
179 }
180
181 /*
182 * connect() to INADDR_ANY means loopback (BSD'ism).
183 */
184
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 &usin->sin6_addr);
189 else
190 usin->sin6_addr = in6addr_loopback;
191 }
192
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195 if (addr_type & IPV6_ADDR_MULTICAST)
196 return -ENETUNREACH;
197
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
202 * must coincide.
203 */
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 return -EINVAL;
206
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
208 }
209
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
212 return -EINVAL;
213 }
214
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
220 }
221
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
224
225 /*
226 * TCP over IPv4
227 */
228
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
232
233 if (__ipv6_only_sock(sk))
234 return -ENETUNREACH;
235
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240 icsk->icsk_af_ops = &ipv6_mapped;
241 if (sk_is_mptcp(sk))
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250 if (err) {
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
253 if (sk_is_mptcp(sk))
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 goto failure;
260 }
261 np->saddr = sk->sk_v6_rcv_saddr;
262
263 return err;
264 }
265
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
268
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
273 fl6.flowi6_oif = sk->sk_bound_dev_if;
274 fl6.flowi6_mark = sk->sk_mark;
275 fl6.fl6_dport = usin->sin6_port;
276 fl6.fl6_sport = inet->inet_sport;
277 fl6.flowi6_uid = sk->sk_uid;
278
279 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280 final_p = fl6_update_dst(&fl6, opt, &final);
281
282 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283
284 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
285 if (IS_ERR(dst)) {
286 err = PTR_ERR(dst);
287 goto failure;
288 }
289
290 if (!saddr) {
291 saddr = &fl6.saddr;
292 sk->sk_v6_rcv_saddr = *saddr;
293 }
294
295 /* set the source address */
296 np->saddr = *saddr;
297 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298
299 sk->sk_gso_type = SKB_GSO_TCPV6;
300 ip6_dst_store(sk, dst, NULL, NULL);
301
302 icsk->icsk_ext_hdr_len = 0;
303 if (opt)
304 icsk->icsk_ext_hdr_len = opt->opt_flen +
305 opt->opt_nflen;
306
307 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308
309 inet->inet_dport = usin->sin6_port;
310
311 tcp_set_state(sk, TCP_SYN_SENT);
312 err = inet6_hash_connect(tcp_death_row, sk);
313 if (err)
314 goto late_failure;
315
316 sk_set_txhash(sk);
317
318 if (likely(!tp->repair)) {
319 if (!tp->write_seq)
320 WRITE_ONCE(tp->write_seq,
321 secure_tcpv6_seq(np->saddr.s6_addr32,
322 sk->sk_v6_daddr.s6_addr32,
323 inet->inet_sport,
324 inet->inet_dport));
325 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 np->saddr.s6_addr32,
327 sk->sk_v6_daddr.s6_addr32);
328 }
329
330 if (tcp_fastopen_defer_connect(sk, &err))
331 return err;
332 if (err)
333 goto late_failure;
334
335 err = tcp_connect(sk);
336 if (err)
337 goto late_failure;
338
339 return 0;
340
341 late_failure:
342 tcp_set_state(sk, TCP_CLOSE);
343 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
344 inet_reset_saddr(sk);
345 failure:
346 inet->inet_dport = 0;
347 sk->sk_route_caps = 0;
348 return err;
349 }
350
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 struct dst_entry *dst;
354 u32 mtu;
355
356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 return;
358
359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360
361 /* Drop requests trying to increase our current mss.
362 * Check done in __ip6_rt_update_pmtu() is too late.
363 */
364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 return;
366
367 dst = inet6_csk_update_pmtu(sk, mtu);
368 if (!dst)
369 return;
370
371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 tcp_sync_mss(sk, dst_mtu(dst));
373 tcp_simple_retransmit(sk);
374 }
375 }
376
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 u8 type, u8 code, int offset, __be32 info)
379 {
380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 struct net *net = dev_net(skb->dev);
383 struct request_sock *fastopen;
384 struct ipv6_pinfo *np;
385 struct tcp_sock *tp;
386 __u32 seq, snd_una;
387 struct sock *sk;
388 bool fatal;
389 int err;
390
391 sk = __inet6_lookup_established(net, &tcp_hashinfo,
392 &hdr->daddr, th->dest,
393 &hdr->saddr, ntohs(th->source),
394 skb->dev->ifindex, inet6_sdif(skb));
395
396 if (!sk) {
397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 ICMP6_MIB_INERRORS);
399 return -ENOENT;
400 }
401
402 if (sk->sk_state == TCP_TIME_WAIT) {
403 inet_twsk_put(inet_twsk(sk));
404 return 0;
405 }
406 seq = ntohl(th->seq);
407 fatal = icmpv6_err_convert(type, code, &err);
408 if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 tcp_req_err(sk, seq, fatal);
410 return 0;
411 }
412
413 bh_lock_sock(sk);
414 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416
417 if (sk->sk_state == TCP_CLOSE)
418 goto out;
419
420 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
421 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422 goto out;
423 }
424
425 tp = tcp_sk(sk);
426 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427 fastopen = rcu_dereference(tp->fastopen_rsk);
428 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429 if (sk->sk_state != TCP_LISTEN &&
430 !between(seq, snd_una, tp->snd_nxt)) {
431 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 goto out;
433 }
434
435 np = tcp_inet6_sk(sk);
436
437 if (type == NDISC_REDIRECT) {
438 if (!sock_owned_by_user(sk)) {
439 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
440
441 if (dst)
442 dst->ops->redirect(dst, sk, skb);
443 }
444 goto out;
445 }
446
447 if (type == ICMPV6_PKT_TOOBIG) {
448 u32 mtu = ntohl(info);
449
450 /* We are not interested in TCP_LISTEN and open_requests
451 * (SYN-ACKs send out by Linux are always <576bytes so
452 * they should go through unfragmented).
453 */
454 if (sk->sk_state == TCP_LISTEN)
455 goto out;
456
457 if (!ip6_sk_accept_pmtu(sk))
458 goto out;
459
460 if (mtu < IPV6_MIN_MTU)
461 goto out;
462
463 WRITE_ONCE(tp->mtu_info, mtu);
464
465 if (!sock_owned_by_user(sk))
466 tcp_v6_mtu_reduced(sk);
467 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
468 &sk->sk_tsq_flags))
469 sock_hold(sk);
470 goto out;
471 }
472
473
474 /* Might be for an request_sock */
475 switch (sk->sk_state) {
476 case TCP_SYN_SENT:
477 case TCP_SYN_RECV:
478 /* Only in fast or simultaneous open. If a fast open socket is
479 * already accepted it is treated as a connected one below.
480 */
481 if (fastopen && !fastopen->sk)
482 break;
483
484 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
485
486 if (!sock_owned_by_user(sk)) {
487 sk->sk_err = err;
488 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
489
490 tcp_done(sk);
491 } else
492 sk->sk_err_soft = err;
493 goto out;
494 case TCP_LISTEN:
495 break;
496 default:
497 /* check if this ICMP message allows revert of backoff.
498 * (see RFC 6069)
499 */
500 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 code == ICMPV6_NOROUTE)
502 tcp_ld_RTO_revert(sk, seq);
503 }
504
505 if (!sock_owned_by_user(sk) && np->recverr) {
506 sk->sk_err = err;
507 sk->sk_error_report(sk);
508 } else
509 sk->sk_err_soft = err;
510
511 out:
512 bh_unlock_sock(sk);
513 sock_put(sk);
514 return 0;
515 }
516
517
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 struct flowi *fl,
520 struct request_sock *req,
521 struct tcp_fastopen_cookie *foc,
522 enum tcp_synack_type synack_type,
523 struct sk_buff *syn_skb)
524 {
525 struct inet_request_sock *ireq = inet_rsk(req);
526 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 struct ipv6_txoptions *opt;
528 struct flowi6 *fl6 = &fl->u.ip6;
529 struct sk_buff *skb;
530 int err = -ENOMEM;
531 u8 tclass;
532
533 /* First, grab a route. */
534 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 IPPROTO_TCP)) == NULL)
536 goto done;
537
538 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539
540 if (skb) {
541 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 &ireq->ir_v6_rmt_addr);
543
544 fl6->daddr = ireq->ir_v6_rmt_addr;
545 if (np->repflow && ireq->pktopts)
546 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547
548 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 (np->tclass & INET_ECN_MASK) :
551 np->tclass;
552
553 if (!INET_ECN_is_capable(tclass) &&
554 tcp_bpf_ca_needs_ecn((struct sock *)req))
555 tclass |= INET_ECN_ECT_0;
556
557 rcu_read_lock();
558 opt = ireq->ipv6_opt;
559 if (!opt)
560 opt = rcu_dereference(np->opt);
561 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
562 tclass, sk->sk_priority);
563 rcu_read_unlock();
564 err = net_xmit_eval(err);
565 }
566
567 done:
568 return err;
569 }
570
571
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 kfree(inet_rsk(req)->ipv6_opt);
575 kfree_skb(inet_rsk(req)->pktopts);
576 }
577
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 const struct in6_addr *addr,
581 int l3index)
582 {
583 return tcp_md5_do_lookup(sk, l3index,
584 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 const struct sock *addr_sk)
589 {
590 int l3index;
591
592 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 addr_sk->sk_bound_dev_if);
594 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 l3index);
596 }
597
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 sockptr_t optval, int optlen)
600 {
601 struct tcp_md5sig cmd;
602 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 int l3index = 0;
604 u8 prefixlen;
605
606 if (optlen < sizeof(cmd))
607 return -EINVAL;
608
609 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
610 return -EFAULT;
611
612 if (sin6->sin6_family != AF_INET6)
613 return -EINVAL;
614
615 if (optname == TCP_MD5SIG_EXT &&
616 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
617 prefixlen = cmd.tcpm_prefixlen;
618 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
619 prefixlen > 32))
620 return -EINVAL;
621 } else {
622 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
623 }
624
625 if (optname == TCP_MD5SIG_EXT &&
626 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
627 struct net_device *dev;
628
629 rcu_read_lock();
630 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
631 if (dev && netif_is_l3_master(dev))
632 l3index = dev->ifindex;
633 rcu_read_unlock();
634
635 /* ok to reference set/not set outside of rcu;
636 * right now device MUST be an L3 master
637 */
638 if (!dev || !l3index)
639 return -EINVAL;
640 }
641
642 if (!cmd.tcpm_keylen) {
643 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
644 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
645 AF_INET, prefixlen,
646 l3index);
647 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
648 AF_INET6, prefixlen, l3index);
649 }
650
651 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
652 return -EINVAL;
653
654 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
655 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
656 AF_INET, prefixlen, l3index,
657 cmd.tcpm_key, cmd.tcpm_keylen,
658 GFP_KERNEL);
659
660 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
661 AF_INET6, prefixlen, l3index,
662 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
663 }
664
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)665 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
666 const struct in6_addr *daddr,
667 const struct in6_addr *saddr,
668 const struct tcphdr *th, int nbytes)
669 {
670 struct tcp6_pseudohdr *bp;
671 struct scatterlist sg;
672 struct tcphdr *_th;
673
674 bp = hp->scratch;
675 /* 1. TCP pseudo-header (RFC2460) */
676 bp->saddr = *saddr;
677 bp->daddr = *daddr;
678 bp->protocol = cpu_to_be32(IPPROTO_TCP);
679 bp->len = cpu_to_be32(nbytes);
680
681 _th = (struct tcphdr *)(bp + 1);
682 memcpy(_th, th, sizeof(*th));
683 _th->check = 0;
684
685 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
686 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
687 sizeof(*bp) + sizeof(*th));
688 return crypto_ahash_update(hp->md5_req);
689 }
690
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)691 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
692 const struct in6_addr *daddr, struct in6_addr *saddr,
693 const struct tcphdr *th)
694 {
695 struct tcp_md5sig_pool *hp;
696 struct ahash_request *req;
697
698 hp = tcp_get_md5sig_pool();
699 if (!hp)
700 goto clear_hash_noput;
701 req = hp->md5_req;
702
703 if (crypto_ahash_init(req))
704 goto clear_hash;
705 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
706 goto clear_hash;
707 if (tcp_md5_hash_key(hp, key))
708 goto clear_hash;
709 ahash_request_set_crypt(req, NULL, md5_hash, 0);
710 if (crypto_ahash_final(req))
711 goto clear_hash;
712
713 tcp_put_md5sig_pool();
714 return 0;
715
716 clear_hash:
717 tcp_put_md5sig_pool();
718 clear_hash_noput:
719 memset(md5_hash, 0, 16);
720 return 1;
721 }
722
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)723 static int tcp_v6_md5_hash_skb(char *md5_hash,
724 const struct tcp_md5sig_key *key,
725 const struct sock *sk,
726 const struct sk_buff *skb)
727 {
728 const struct in6_addr *saddr, *daddr;
729 struct tcp_md5sig_pool *hp;
730 struct ahash_request *req;
731 const struct tcphdr *th = tcp_hdr(skb);
732
733 if (sk) { /* valid for establish/request sockets */
734 saddr = &sk->sk_v6_rcv_saddr;
735 daddr = &sk->sk_v6_daddr;
736 } else {
737 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
738 saddr = &ip6h->saddr;
739 daddr = &ip6h->daddr;
740 }
741
742 hp = tcp_get_md5sig_pool();
743 if (!hp)
744 goto clear_hash_noput;
745 req = hp->md5_req;
746
747 if (crypto_ahash_init(req))
748 goto clear_hash;
749
750 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
751 goto clear_hash;
752 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
753 goto clear_hash;
754 if (tcp_md5_hash_key(hp, key))
755 goto clear_hash;
756 ahash_request_set_crypt(req, NULL, md5_hash, 0);
757 if (crypto_ahash_final(req))
758 goto clear_hash;
759
760 tcp_put_md5sig_pool();
761 return 0;
762
763 clear_hash:
764 tcp_put_md5sig_pool();
765 clear_hash_noput:
766 memset(md5_hash, 0, 16);
767 return 1;
768 }
769
770 #endif
771
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)772 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
773 const struct sk_buff *skb,
774 int dif, int sdif)
775 {
776 #ifdef CONFIG_TCP_MD5SIG
777 const __u8 *hash_location = NULL;
778 struct tcp_md5sig_key *hash_expected;
779 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
780 const struct tcphdr *th = tcp_hdr(skb);
781 int genhash, l3index;
782 u8 newhash[16];
783
784 /* sdif set, means packet ingressed via a device
785 * in an L3 domain and dif is set to the l3mdev
786 */
787 l3index = sdif ? dif : 0;
788
789 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
790 hash_location = tcp_parse_md5sig_option(th);
791
792 /* We've parsed the options - do we have a hash? */
793 if (!hash_expected && !hash_location)
794 return false;
795
796 if (hash_expected && !hash_location) {
797 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
798 return true;
799 }
800
801 if (!hash_expected && hash_location) {
802 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
803 return true;
804 }
805
806 /* check the signature */
807 genhash = tcp_v6_md5_hash_skb(newhash,
808 hash_expected,
809 NULL, skb);
810
811 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
812 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
813 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
814 genhash ? "failed" : "mismatch",
815 &ip6h->saddr, ntohs(th->source),
816 &ip6h->daddr, ntohs(th->dest), l3index);
817 return true;
818 }
819 #endif
820 return false;
821 }
822
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)823 static void tcp_v6_init_req(struct request_sock *req,
824 const struct sock *sk_listener,
825 struct sk_buff *skb)
826 {
827 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
828 struct inet_request_sock *ireq = inet_rsk(req);
829 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
830
831 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
832 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
833
834 /* So that link locals have meaning */
835 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
836 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
837 ireq->ir_iif = tcp_v6_iif(skb);
838
839 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
840 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
841 np->rxopt.bits.rxinfo ||
842 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
843 np->rxopt.bits.rxohlim || np->repflow)) {
844 refcount_inc(&skb->users);
845 ireq->pktopts = skb;
846 }
847 }
848
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)849 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
850 struct flowi *fl,
851 const struct request_sock *req)
852 {
853 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
854 }
855
856 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
857 .family = AF_INET6,
858 .obj_size = sizeof(struct tcp6_request_sock),
859 .rtx_syn_ack = tcp_rtx_synack,
860 .send_ack = tcp_v6_reqsk_send_ack,
861 .destructor = tcp_v6_reqsk_destructor,
862 .send_reset = tcp_v6_send_reset,
863 .syn_ack_timeout = tcp_syn_ack_timeout,
864 };
865
866 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
867 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
868 sizeof(struct ipv6hdr),
869 #ifdef CONFIG_TCP_MD5SIG
870 .req_md5_lookup = tcp_v6_md5_lookup,
871 .calc_md5_hash = tcp_v6_md5_hash_skb,
872 #endif
873 .init_req = tcp_v6_init_req,
874 #ifdef CONFIG_SYN_COOKIES
875 .cookie_init_seq = cookie_v6_init_sequence,
876 #endif
877 .route_req = tcp_v6_route_req,
878 .init_seq = tcp_v6_init_seq,
879 .init_ts_off = tcp_v6_init_ts_off,
880 .send_synack = tcp_v6_send_synack,
881 };
882
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)883 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
884 u32 ack, u32 win, u32 tsval, u32 tsecr,
885 int oif, struct tcp_md5sig_key *key, int rst,
886 u8 tclass, __be32 label, u32 priority)
887 {
888 const struct tcphdr *th = tcp_hdr(skb);
889 struct tcphdr *t1;
890 struct sk_buff *buff;
891 struct flowi6 fl6;
892 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
893 struct sock *ctl_sk = net->ipv6.tcp_sk;
894 unsigned int tot_len = sizeof(struct tcphdr);
895 struct dst_entry *dst;
896 __be32 *topt;
897 __u32 mark = 0;
898
899 if (tsecr)
900 tot_len += TCPOLEN_TSTAMP_ALIGNED;
901 #ifdef CONFIG_TCP_MD5SIG
902 if (key)
903 tot_len += TCPOLEN_MD5SIG_ALIGNED;
904 #endif
905
906 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
907 GFP_ATOMIC);
908 if (!buff)
909 return;
910
911 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
912
913 t1 = skb_push(buff, tot_len);
914 skb_reset_transport_header(buff);
915
916 /* Swap the send and the receive. */
917 memset(t1, 0, sizeof(*t1));
918 t1->dest = th->source;
919 t1->source = th->dest;
920 t1->doff = tot_len / 4;
921 t1->seq = htonl(seq);
922 t1->ack_seq = htonl(ack);
923 t1->ack = !rst || !th->ack;
924 t1->rst = rst;
925 t1->window = htons(win);
926
927 topt = (__be32 *)(t1 + 1);
928
929 if (tsecr) {
930 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
931 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
932 *topt++ = htonl(tsval);
933 *topt++ = htonl(tsecr);
934 }
935
936 #ifdef CONFIG_TCP_MD5SIG
937 if (key) {
938 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
939 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
940 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
941 &ipv6_hdr(skb)->saddr,
942 &ipv6_hdr(skb)->daddr, t1);
943 }
944 #endif
945
946 memset(&fl6, 0, sizeof(fl6));
947 fl6.daddr = ipv6_hdr(skb)->saddr;
948 fl6.saddr = ipv6_hdr(skb)->daddr;
949 fl6.flowlabel = label;
950
951 buff->ip_summed = CHECKSUM_PARTIAL;
952 buff->csum = 0;
953
954 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
955
956 fl6.flowi6_proto = IPPROTO_TCP;
957 if (rt6_need_strict(&fl6.daddr) && !oif)
958 fl6.flowi6_oif = tcp_v6_iif(skb);
959 else {
960 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
961 oif = skb->skb_iif;
962
963 fl6.flowi6_oif = oif;
964 }
965
966 if (sk) {
967 if (sk->sk_state == TCP_TIME_WAIT) {
968 mark = inet_twsk(sk)->tw_mark;
969 /* autoflowlabel relies on buff->hash */
970 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
971 PKT_HASH_TYPE_L4);
972 } else {
973 mark = sk->sk_mark;
974 }
975 buff->tstamp = tcp_transmit_time(sk);
976 }
977 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
978 fl6.fl6_dport = t1->dest;
979 fl6.fl6_sport = t1->source;
980 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
981 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
982
983 /* Pass a socket to ip6_dst_lookup either it is for RST
984 * Underlying function will use this to retrieve the network
985 * namespace
986 */
987 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
988 if (!IS_ERR(dst)) {
989 skb_dst_set(buff, dst);
990 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
991 tclass & ~INET_ECN_MASK, priority);
992 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
993 if (rst)
994 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
995 return;
996 }
997
998 kfree_skb(buff);
999 }
1000
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1001 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1002 {
1003 const struct tcphdr *th = tcp_hdr(skb);
1004 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1005 u32 seq = 0, ack_seq = 0;
1006 struct tcp_md5sig_key *key = NULL;
1007 #ifdef CONFIG_TCP_MD5SIG
1008 const __u8 *hash_location = NULL;
1009 unsigned char newhash[16];
1010 int genhash;
1011 struct sock *sk1 = NULL;
1012 #endif
1013 __be32 label = 0;
1014 u32 priority = 0;
1015 struct net *net;
1016 int oif = 0;
1017
1018 if (th->rst)
1019 return;
1020
1021 /* If sk not NULL, it means we did a successful lookup and incoming
1022 * route had to be correct. prequeue might have dropped our dst.
1023 */
1024 if (!sk && !ipv6_unicast_destination(skb))
1025 return;
1026
1027 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1028 #ifdef CONFIG_TCP_MD5SIG
1029 rcu_read_lock();
1030 hash_location = tcp_parse_md5sig_option(th);
1031 if (sk && sk_fullsock(sk)) {
1032 int l3index;
1033
1034 /* sdif set, means packet ingressed via a device
1035 * in an L3 domain and inet_iif is set to it.
1036 */
1037 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1038 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1039 } else if (hash_location) {
1040 int dif = tcp_v6_iif_l3_slave(skb);
1041 int sdif = tcp_v6_sdif(skb);
1042 int l3index;
1043
1044 /*
1045 * active side is lost. Try to find listening socket through
1046 * source port, and then find md5 key through listening socket.
1047 * we are not loose security here:
1048 * Incoming packet is checked with md5 hash with finding key,
1049 * no RST generated if md5 hash doesn't match.
1050 */
1051 sk1 = inet6_lookup_listener(net,
1052 &tcp_hashinfo, NULL, 0,
1053 &ipv6h->saddr,
1054 th->source, &ipv6h->daddr,
1055 ntohs(th->source), dif, sdif);
1056 if (!sk1)
1057 goto out;
1058
1059 /* sdif set, means packet ingressed via a device
1060 * in an L3 domain and dif is set to it.
1061 */
1062 l3index = tcp_v6_sdif(skb) ? dif : 0;
1063
1064 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1065 if (!key)
1066 goto out;
1067
1068 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1069 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1070 goto out;
1071 }
1072 #endif
1073
1074 if (th->ack)
1075 seq = ntohl(th->ack_seq);
1076 else
1077 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1078 (th->doff << 2);
1079
1080 if (sk) {
1081 oif = sk->sk_bound_dev_if;
1082 if (sk_fullsock(sk)) {
1083 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1084
1085 trace_tcp_send_reset(sk, skb);
1086 if (np->repflow)
1087 label = ip6_flowlabel(ipv6h);
1088 priority = sk->sk_priority;
1089 }
1090 if (sk->sk_state == TCP_TIME_WAIT) {
1091 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1092 priority = inet_twsk(sk)->tw_priority;
1093 }
1094 } else {
1095 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1096 label = ip6_flowlabel(ipv6h);
1097 }
1098
1099 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1100 ipv6_get_dsfield(ipv6h), label, priority);
1101
1102 #ifdef CONFIG_TCP_MD5SIG
1103 out:
1104 rcu_read_unlock();
1105 #endif
1106 }
1107
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1108 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1109 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1110 struct tcp_md5sig_key *key, u8 tclass,
1111 __be32 label, u32 priority)
1112 {
1113 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1114 tclass, label, priority);
1115 }
1116
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1117 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1118 {
1119 struct inet_timewait_sock *tw = inet_twsk(sk);
1120 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1121
1122 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1123 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1124 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1125 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1126 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1127
1128 inet_twsk_put(tw);
1129 }
1130
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1131 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1132 struct request_sock *req)
1133 {
1134 int l3index;
1135
1136 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1137
1138 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1139 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1140 */
1141 /* RFC 7323 2.3
1142 * The window field (SEG.WND) of every outgoing segment, with the
1143 * exception of <SYN> segments, MUST be right-shifted by
1144 * Rcv.Wind.Shift bits:
1145 */
1146 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1147 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1148 tcp_rsk(req)->rcv_nxt,
1149 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1150 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1151 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1152 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1153 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1154 }
1155
1156
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1157 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1158 {
1159 #ifdef CONFIG_SYN_COOKIES
1160 const struct tcphdr *th = tcp_hdr(skb);
1161
1162 if (!th->syn)
1163 sk = cookie_v6_check(sk, skb);
1164 #endif
1165 return sk;
1166 }
1167
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1168 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1169 struct tcphdr *th, u32 *cookie)
1170 {
1171 u16 mss = 0;
1172 #ifdef CONFIG_SYN_COOKIES
1173 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1174 &tcp_request_sock_ipv6_ops, sk, th);
1175 if (mss) {
1176 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1177 tcp_synq_overflow(sk);
1178 }
1179 #endif
1180 return mss;
1181 }
1182
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1183 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1184 {
1185 if (skb->protocol == htons(ETH_P_IP))
1186 return tcp_v4_conn_request(sk, skb);
1187
1188 if (!ipv6_unicast_destination(skb))
1189 goto drop;
1190
1191 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1192 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1193 return 0;
1194 }
1195
1196 return tcp_conn_request(&tcp6_request_sock_ops,
1197 &tcp_request_sock_ipv6_ops, sk, skb);
1198
1199 drop:
1200 tcp_listendrop(sk);
1201 return 0; /* don't send reset */
1202 }
1203
tcp_v6_restore_cb(struct sk_buff * skb)1204 static void tcp_v6_restore_cb(struct sk_buff *skb)
1205 {
1206 /* We need to move header back to the beginning if xfrm6_policy_check()
1207 * and tcp_v6_fill_cb() are going to be called again.
1208 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1209 */
1210 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1211 sizeof(struct inet6_skb_parm));
1212 }
1213
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1214 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1215 struct request_sock *req,
1216 struct dst_entry *dst,
1217 struct request_sock *req_unhash,
1218 bool *own_req)
1219 {
1220 struct inet_request_sock *ireq;
1221 struct ipv6_pinfo *newnp;
1222 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1223 struct ipv6_txoptions *opt;
1224 struct inet_sock *newinet;
1225 bool found_dup_sk = false;
1226 struct tcp_sock *newtp;
1227 struct sock *newsk;
1228 #ifdef CONFIG_TCP_MD5SIG
1229 struct tcp_md5sig_key *key;
1230 int l3index;
1231 #endif
1232 struct flowi6 fl6;
1233
1234 if (skb->protocol == htons(ETH_P_IP)) {
1235 /*
1236 * v6 mapped
1237 */
1238
1239 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1240 req_unhash, own_req);
1241
1242 if (!newsk)
1243 return NULL;
1244
1245 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1246
1247 newinet = inet_sk(newsk);
1248 newnp = tcp_inet6_sk(newsk);
1249 newtp = tcp_sk(newsk);
1250
1251 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1252
1253 newnp->saddr = newsk->sk_v6_rcv_saddr;
1254
1255 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1256 if (sk_is_mptcp(newsk))
1257 mptcpv6_handle_mapped(newsk, true);
1258 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1259 #ifdef CONFIG_TCP_MD5SIG
1260 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1261 #endif
1262
1263 newnp->ipv6_mc_list = NULL;
1264 newnp->ipv6_ac_list = NULL;
1265 newnp->ipv6_fl_list = NULL;
1266 newnp->pktoptions = NULL;
1267 newnp->opt = NULL;
1268 newnp->mcast_oif = inet_iif(skb);
1269 newnp->mcast_hops = ip_hdr(skb)->ttl;
1270 newnp->rcv_flowinfo = 0;
1271 if (np->repflow)
1272 newnp->flow_label = 0;
1273
1274 /*
1275 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1276 * here, tcp_create_openreq_child now does this for us, see the comment in
1277 * that function for the gory details. -acme
1278 */
1279
1280 /* It is tricky place. Until this moment IPv4 tcp
1281 worked with IPv6 icsk.icsk_af_ops.
1282 Sync it now.
1283 */
1284 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1285
1286 return newsk;
1287 }
1288
1289 ireq = inet_rsk(req);
1290
1291 if (sk_acceptq_is_full(sk))
1292 goto out_overflow;
1293
1294 if (!dst) {
1295 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1296 if (!dst)
1297 goto out;
1298 }
1299
1300 newsk = tcp_create_openreq_child(sk, req, skb);
1301 if (!newsk)
1302 goto out_nonewsk;
1303
1304 /*
1305 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1306 * count here, tcp_create_openreq_child now does this for us, see the
1307 * comment in that function for the gory details. -acme
1308 */
1309
1310 newsk->sk_gso_type = SKB_GSO_TCPV6;
1311 ip6_dst_store(newsk, dst, NULL, NULL);
1312 inet6_sk_rx_dst_set(newsk, skb);
1313
1314 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1315
1316 newtp = tcp_sk(newsk);
1317 newinet = inet_sk(newsk);
1318 newnp = tcp_inet6_sk(newsk);
1319
1320 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1321
1322 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1323 newnp->saddr = ireq->ir_v6_loc_addr;
1324 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1325 newsk->sk_bound_dev_if = ireq->ir_iif;
1326
1327 /* Now IPv6 options...
1328
1329 First: no IPv4 options.
1330 */
1331 newinet->inet_opt = NULL;
1332 newnp->ipv6_mc_list = NULL;
1333 newnp->ipv6_ac_list = NULL;
1334 newnp->ipv6_fl_list = NULL;
1335
1336 /* Clone RX bits */
1337 newnp->rxopt.all = np->rxopt.all;
1338
1339 newnp->pktoptions = NULL;
1340 newnp->opt = NULL;
1341 newnp->mcast_oif = tcp_v6_iif(skb);
1342 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1343 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1344 if (np->repflow)
1345 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1346
1347 /* Set ToS of the new socket based upon the value of incoming SYN.
1348 * ECT bits are set later in tcp_init_transfer().
1349 */
1350 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1351 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1352
1353 /* Clone native IPv6 options from listening socket (if any)
1354
1355 Yes, keeping reference count would be much more clever,
1356 but we make one more one thing there: reattach optmem
1357 to newsk.
1358 */
1359 opt = ireq->ipv6_opt;
1360 if (!opt)
1361 opt = rcu_dereference(np->opt);
1362 if (opt) {
1363 opt = ipv6_dup_options(newsk, opt);
1364 RCU_INIT_POINTER(newnp->opt, opt);
1365 }
1366 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1367 if (opt)
1368 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1369 opt->opt_flen;
1370
1371 tcp_ca_openreq_child(newsk, dst);
1372
1373 tcp_sync_mss(newsk, dst_mtu(dst));
1374 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1375
1376 tcp_initialize_rcv_mss(newsk);
1377
1378 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1379 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1380
1381 #ifdef CONFIG_TCP_MD5SIG
1382 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1383
1384 /* Copy over the MD5 key from the original socket */
1385 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1386 if (key) {
1387 /* We're using one, so create a matching key
1388 * on the newsk structure. If we fail to get
1389 * memory, then we end up not copying the key
1390 * across. Shucks.
1391 */
1392 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1393 AF_INET6, 128, l3index, key->key, key->keylen,
1394 sk_gfp_mask(sk, GFP_ATOMIC));
1395 }
1396 #endif
1397
1398 if (__inet_inherit_port(sk, newsk) < 0) {
1399 inet_csk_prepare_forced_close(newsk);
1400 tcp_done(newsk);
1401 goto out;
1402 }
1403 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1404 &found_dup_sk);
1405 if (*own_req) {
1406 tcp_move_syn(newtp, req);
1407
1408 /* Clone pktoptions received with SYN, if we own the req */
1409 if (ireq->pktopts) {
1410 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1411 consume_skb(ireq->pktopts);
1412 ireq->pktopts = NULL;
1413 if (newnp->pktoptions)
1414 tcp_v6_restore_cb(newnp->pktoptions);
1415 }
1416 } else {
1417 if (!req_unhash && found_dup_sk) {
1418 /* This code path should only be executed in the
1419 * syncookie case only
1420 */
1421 bh_unlock_sock(newsk);
1422 sock_put(newsk);
1423 newsk = NULL;
1424 }
1425 }
1426
1427 return newsk;
1428
1429 out_overflow:
1430 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1431 out_nonewsk:
1432 dst_release(dst);
1433 out:
1434 tcp_listendrop(sk);
1435 return NULL;
1436 }
1437
1438 /* The socket must have it's spinlock held when we get
1439 * here, unless it is a TCP_LISTEN socket.
1440 *
1441 * We have a potential double-lock case here, so even when
1442 * doing backlog processing we use the BH locking scheme.
1443 * This is because we cannot sleep with the original spinlock
1444 * held.
1445 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1446 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1447 {
1448 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1449 struct sk_buff *opt_skb = NULL;
1450 struct tcp_sock *tp;
1451
1452 /* Imagine: socket is IPv6. IPv4 packet arrives,
1453 goes to IPv4 receive handler and backlogged.
1454 From backlog it always goes here. Kerboom...
1455 Fortunately, tcp_rcv_established and rcv_established
1456 handle them correctly, but it is not case with
1457 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1458 */
1459
1460 if (skb->protocol == htons(ETH_P_IP))
1461 return tcp_v4_do_rcv(sk, skb);
1462
1463 /*
1464 * socket locking is here for SMP purposes as backlog rcv
1465 * is currently called with bh processing disabled.
1466 */
1467
1468 /* Do Stevens' IPV6_PKTOPTIONS.
1469
1470 Yes, guys, it is the only place in our code, where we
1471 may make it not affecting IPv4.
1472 The rest of code is protocol independent,
1473 and I do not like idea to uglify IPv4.
1474
1475 Actually, all the idea behind IPV6_PKTOPTIONS
1476 looks not very well thought. For now we latch
1477 options, received in the last packet, enqueued
1478 by tcp. Feel free to propose better solution.
1479 --ANK (980728)
1480 */
1481 if (np->rxopt.all)
1482 opt_skb = skb_clone_and_charge_r(skb, sk);
1483
1484 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1485 struct dst_entry *dst;
1486
1487 dst = rcu_dereference_protected(sk->sk_rx_dst,
1488 lockdep_sock_is_held(sk));
1489
1490 sock_rps_save_rxhash(sk, skb);
1491 sk_mark_napi_id(sk, skb);
1492 if (dst) {
1493 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1494 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1495 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1496 dst_release(dst);
1497 }
1498 }
1499
1500 tcp_rcv_established(sk, skb);
1501 if (opt_skb)
1502 goto ipv6_pktoptions;
1503 return 0;
1504 }
1505
1506 if (tcp_checksum_complete(skb))
1507 goto csum_err;
1508
1509 if (sk->sk_state == TCP_LISTEN) {
1510 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1511
1512 if (!nsk)
1513 goto discard;
1514
1515 if (nsk != sk) {
1516 if (tcp_child_process(sk, nsk, skb))
1517 goto reset;
1518 if (opt_skb)
1519 __kfree_skb(opt_skb);
1520 return 0;
1521 }
1522 } else
1523 sock_rps_save_rxhash(sk, skb);
1524
1525 if (tcp_rcv_state_process(sk, skb))
1526 goto reset;
1527 if (opt_skb)
1528 goto ipv6_pktoptions;
1529 return 0;
1530
1531 reset:
1532 tcp_v6_send_reset(sk, skb);
1533 discard:
1534 if (opt_skb)
1535 __kfree_skb(opt_skb);
1536 kfree_skb(skb);
1537 return 0;
1538 csum_err:
1539 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1540 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1541 goto discard;
1542
1543
1544 ipv6_pktoptions:
1545 /* Do you ask, what is it?
1546
1547 1. skb was enqueued by tcp.
1548 2. skb is added to tail of read queue, rather than out of order.
1549 3. socket is not in passive state.
1550 4. Finally, it really contains options, which user wants to receive.
1551 */
1552 tp = tcp_sk(sk);
1553 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1554 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1555 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1556 np->mcast_oif = tcp_v6_iif(opt_skb);
1557 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1558 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1559 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1560 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1561 if (np->repflow)
1562 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1563 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1564 tcp_v6_restore_cb(opt_skb);
1565 opt_skb = xchg(&np->pktoptions, opt_skb);
1566 } else {
1567 __kfree_skb(opt_skb);
1568 opt_skb = xchg(&np->pktoptions, NULL);
1569 }
1570 }
1571
1572 kfree_skb(opt_skb);
1573 return 0;
1574 }
1575
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1576 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1577 const struct tcphdr *th)
1578 {
1579 /* This is tricky: we move IP6CB at its correct location into
1580 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1581 * _decode_session6() uses IP6CB().
1582 * barrier() makes sure compiler won't play aliasing games.
1583 */
1584 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1585 sizeof(struct inet6_skb_parm));
1586 barrier();
1587
1588 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1589 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1590 skb->len - th->doff*4);
1591 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1592 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1593 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1594 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1595 TCP_SKB_CB(skb)->sacked = 0;
1596 TCP_SKB_CB(skb)->has_rxtstamp =
1597 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1598 }
1599
tcp_v6_rcv(struct sk_buff * skb)1600 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1601 {
1602 struct sk_buff *skb_to_free;
1603 int sdif = inet6_sdif(skb);
1604 int dif = inet6_iif(skb);
1605 const struct tcphdr *th;
1606 const struct ipv6hdr *hdr;
1607 bool refcounted;
1608 struct sock *sk;
1609 int ret;
1610 struct net *net = dev_net(skb->dev);
1611
1612 if (skb->pkt_type != PACKET_HOST)
1613 goto discard_it;
1614
1615 /*
1616 * Count it even if it's bad.
1617 */
1618 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1619
1620 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1621 goto discard_it;
1622
1623 th = (const struct tcphdr *)skb->data;
1624
1625 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1626 goto bad_packet;
1627 if (!pskb_may_pull(skb, th->doff*4))
1628 goto discard_it;
1629
1630 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1631 goto csum_error;
1632
1633 th = (const struct tcphdr *)skb->data;
1634 hdr = ipv6_hdr(skb);
1635
1636 lookup:
1637 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1638 th->source, th->dest, inet6_iif(skb), sdif,
1639 &refcounted);
1640 if (!sk)
1641 goto no_tcp_socket;
1642
1643 process:
1644 if (sk->sk_state == TCP_TIME_WAIT)
1645 goto do_time_wait;
1646
1647 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1648 struct request_sock *req = inet_reqsk(sk);
1649 bool req_stolen = false;
1650 struct sock *nsk;
1651
1652 sk = req->rsk_listener;
1653 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1654 sk_drops_add(sk, skb);
1655 reqsk_put(req);
1656 goto discard_it;
1657 }
1658 if (tcp_checksum_complete(skb)) {
1659 reqsk_put(req);
1660 goto csum_error;
1661 }
1662 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1663 inet_csk_reqsk_queue_drop_and_put(sk, req);
1664 goto lookup;
1665 }
1666 sock_hold(sk);
1667 refcounted = true;
1668 nsk = NULL;
1669 if (!tcp_filter(sk, skb)) {
1670 th = (const struct tcphdr *)skb->data;
1671 hdr = ipv6_hdr(skb);
1672 tcp_v6_fill_cb(skb, hdr, th);
1673 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1674 }
1675 if (!nsk) {
1676 reqsk_put(req);
1677 if (req_stolen) {
1678 /* Another cpu got exclusive access to req
1679 * and created a full blown socket.
1680 * Try to feed this packet to this socket
1681 * instead of discarding it.
1682 */
1683 tcp_v6_restore_cb(skb);
1684 sock_put(sk);
1685 goto lookup;
1686 }
1687 goto discard_and_relse;
1688 }
1689 if (nsk == sk) {
1690 reqsk_put(req);
1691 tcp_v6_restore_cb(skb);
1692 } else if (tcp_child_process(sk, nsk, skb)) {
1693 tcp_v6_send_reset(nsk, skb);
1694 goto discard_and_relse;
1695 } else {
1696 sock_put(sk);
1697 return 0;
1698 }
1699 }
1700 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1701 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1702 goto discard_and_relse;
1703 }
1704
1705 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1706 goto discard_and_relse;
1707
1708 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1709 goto discard_and_relse;
1710
1711 if (tcp_filter(sk, skb))
1712 goto discard_and_relse;
1713 th = (const struct tcphdr *)skb->data;
1714 hdr = ipv6_hdr(skb);
1715 tcp_v6_fill_cb(skb, hdr, th);
1716
1717 skb->dev = NULL;
1718
1719 if (sk->sk_state == TCP_LISTEN) {
1720 ret = tcp_v6_do_rcv(sk, skb);
1721 goto put_and_return;
1722 }
1723
1724 sk_incoming_cpu_update(sk);
1725
1726 bh_lock_sock_nested(sk);
1727 tcp_segs_in(tcp_sk(sk), skb);
1728 ret = 0;
1729 if (!sock_owned_by_user(sk)) {
1730 skb_to_free = sk->sk_rx_skb_cache;
1731 sk->sk_rx_skb_cache = NULL;
1732 ret = tcp_v6_do_rcv(sk, skb);
1733 } else {
1734 if (tcp_add_backlog(sk, skb))
1735 goto discard_and_relse;
1736 skb_to_free = NULL;
1737 }
1738 bh_unlock_sock(sk);
1739 if (skb_to_free)
1740 __kfree_skb(skb_to_free);
1741 put_and_return:
1742 if (refcounted)
1743 sock_put(sk);
1744 return ret ? -1 : 0;
1745
1746 no_tcp_socket:
1747 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1748 goto discard_it;
1749
1750 tcp_v6_fill_cb(skb, hdr, th);
1751
1752 if (tcp_checksum_complete(skb)) {
1753 csum_error:
1754 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1755 bad_packet:
1756 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1757 } else {
1758 tcp_v6_send_reset(NULL, skb);
1759 }
1760
1761 discard_it:
1762 kfree_skb(skb);
1763 return 0;
1764
1765 discard_and_relse:
1766 sk_drops_add(sk, skb);
1767 if (refcounted)
1768 sock_put(sk);
1769 goto discard_it;
1770
1771 do_time_wait:
1772 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1773 inet_twsk_put(inet_twsk(sk));
1774 goto discard_it;
1775 }
1776
1777 tcp_v6_fill_cb(skb, hdr, th);
1778
1779 if (tcp_checksum_complete(skb)) {
1780 inet_twsk_put(inet_twsk(sk));
1781 goto csum_error;
1782 }
1783
1784 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1785 case TCP_TW_SYN:
1786 {
1787 struct sock *sk2;
1788
1789 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1790 skb, __tcp_hdrlen(th),
1791 &ipv6_hdr(skb)->saddr, th->source,
1792 &ipv6_hdr(skb)->daddr,
1793 ntohs(th->dest),
1794 tcp_v6_iif_l3_slave(skb),
1795 sdif);
1796 if (sk2) {
1797 struct inet_timewait_sock *tw = inet_twsk(sk);
1798 inet_twsk_deschedule_put(tw);
1799 sk = sk2;
1800 tcp_v6_restore_cb(skb);
1801 refcounted = false;
1802 goto process;
1803 }
1804 }
1805 /* to ACK */
1806 fallthrough;
1807 case TCP_TW_ACK:
1808 tcp_v6_timewait_ack(sk, skb);
1809 break;
1810 case TCP_TW_RST:
1811 tcp_v6_send_reset(sk, skb);
1812 inet_twsk_deschedule_put(inet_twsk(sk));
1813 goto discard_it;
1814 case TCP_TW_SUCCESS:
1815 ;
1816 }
1817 goto discard_it;
1818 }
1819
tcp_v6_early_demux(struct sk_buff * skb)1820 void tcp_v6_early_demux(struct sk_buff *skb)
1821 {
1822 const struct ipv6hdr *hdr;
1823 const struct tcphdr *th;
1824 struct sock *sk;
1825
1826 if (skb->pkt_type != PACKET_HOST)
1827 return;
1828
1829 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1830 return;
1831
1832 hdr = ipv6_hdr(skb);
1833 th = tcp_hdr(skb);
1834
1835 if (th->doff < sizeof(struct tcphdr) / 4)
1836 return;
1837
1838 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1839 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1840 &hdr->saddr, th->source,
1841 &hdr->daddr, ntohs(th->dest),
1842 inet6_iif(skb), inet6_sdif(skb));
1843 if (sk) {
1844 skb->sk = sk;
1845 skb->destructor = sock_edemux;
1846 if (sk_fullsock(sk)) {
1847 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1848
1849 if (dst)
1850 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1851 if (dst &&
1852 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1853 skb_dst_set_noref(skb, dst);
1854 }
1855 }
1856 }
1857
1858 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1859 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1860 .twsk_unique = tcp_twsk_unique,
1861 .twsk_destructor = tcp_twsk_destructor,
1862 };
1863
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1864 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1865 {
1866 struct ipv6_pinfo *np = inet6_sk(sk);
1867
1868 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1869 }
1870
1871 const struct inet_connection_sock_af_ops ipv6_specific = {
1872 .queue_xmit = inet6_csk_xmit,
1873 .send_check = tcp_v6_send_check,
1874 .rebuild_header = inet6_sk_rebuild_header,
1875 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1876 .conn_request = tcp_v6_conn_request,
1877 .syn_recv_sock = tcp_v6_syn_recv_sock,
1878 .net_header_len = sizeof(struct ipv6hdr),
1879 .net_frag_header_len = sizeof(struct frag_hdr),
1880 .setsockopt = ipv6_setsockopt,
1881 .getsockopt = ipv6_getsockopt,
1882 .addr2sockaddr = inet6_csk_addr2sockaddr,
1883 .sockaddr_len = sizeof(struct sockaddr_in6),
1884 .mtu_reduced = tcp_v6_mtu_reduced,
1885 };
1886
1887 #ifdef CONFIG_TCP_MD5SIG
1888 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1889 .md5_lookup = tcp_v6_md5_lookup,
1890 .calc_md5_hash = tcp_v6_md5_hash_skb,
1891 .md5_parse = tcp_v6_parse_md5_keys,
1892 };
1893 #endif
1894
1895 /*
1896 * TCP over IPv4 via INET6 API
1897 */
1898 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1899 .queue_xmit = ip_queue_xmit,
1900 .send_check = tcp_v4_send_check,
1901 .rebuild_header = inet_sk_rebuild_header,
1902 .sk_rx_dst_set = inet_sk_rx_dst_set,
1903 .conn_request = tcp_v6_conn_request,
1904 .syn_recv_sock = tcp_v6_syn_recv_sock,
1905 .net_header_len = sizeof(struct iphdr),
1906 .setsockopt = ipv6_setsockopt,
1907 .getsockopt = ipv6_getsockopt,
1908 .addr2sockaddr = inet6_csk_addr2sockaddr,
1909 .sockaddr_len = sizeof(struct sockaddr_in6),
1910 .mtu_reduced = tcp_v4_mtu_reduced,
1911 };
1912
1913 #ifdef CONFIG_TCP_MD5SIG
1914 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1915 .md5_lookup = tcp_v4_md5_lookup,
1916 .calc_md5_hash = tcp_v4_md5_hash_skb,
1917 .md5_parse = tcp_v6_parse_md5_keys,
1918 };
1919 #endif
1920
1921 /* NOTE: A lot of things set to zero explicitly by call to
1922 * sk_alloc() so need not be done here.
1923 */
tcp_v6_init_sock(struct sock * sk)1924 static int tcp_v6_init_sock(struct sock *sk)
1925 {
1926 struct inet_connection_sock *icsk = inet_csk(sk);
1927
1928 tcp_init_sock(sk);
1929
1930 icsk->icsk_af_ops = &ipv6_specific;
1931
1932 #ifdef CONFIG_TCP_MD5SIG
1933 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1934 #endif
1935
1936 return 0;
1937 }
1938
1939 #ifdef CONFIG_PROC_FS
1940 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1941 static void get_openreq6(struct seq_file *seq,
1942 const struct request_sock *req, int i)
1943 {
1944 long ttd = req->rsk_timer.expires - jiffies;
1945 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1946 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1947
1948 if (ttd < 0)
1949 ttd = 0;
1950
1951 seq_printf(seq,
1952 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1953 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1954 i,
1955 src->s6_addr32[0], src->s6_addr32[1],
1956 src->s6_addr32[2], src->s6_addr32[3],
1957 inet_rsk(req)->ir_num,
1958 dest->s6_addr32[0], dest->s6_addr32[1],
1959 dest->s6_addr32[2], dest->s6_addr32[3],
1960 ntohs(inet_rsk(req)->ir_rmt_port),
1961 TCP_SYN_RECV,
1962 0, 0, /* could print option size, but that is af dependent. */
1963 1, /* timers active (only the expire timer) */
1964 jiffies_to_clock_t(ttd),
1965 req->num_timeout,
1966 from_kuid_munged(seq_user_ns(seq),
1967 sock_i_uid(req->rsk_listener)),
1968 0, /* non standard timer */
1969 0, /* open_requests have no inode */
1970 0, req);
1971 }
1972
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1973 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1974 {
1975 const struct in6_addr *dest, *src;
1976 __u16 destp, srcp;
1977 int timer_active;
1978 unsigned long timer_expires;
1979 const struct inet_sock *inet = inet_sk(sp);
1980 const struct tcp_sock *tp = tcp_sk(sp);
1981 const struct inet_connection_sock *icsk = inet_csk(sp);
1982 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1983 int rx_queue;
1984 int state;
1985
1986 dest = &sp->sk_v6_daddr;
1987 src = &sp->sk_v6_rcv_saddr;
1988 destp = ntohs(inet->inet_dport);
1989 srcp = ntohs(inet->inet_sport);
1990
1991 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1992 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1993 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1994 timer_active = 1;
1995 timer_expires = icsk->icsk_timeout;
1996 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1997 timer_active = 4;
1998 timer_expires = icsk->icsk_timeout;
1999 } else if (timer_pending(&sp->sk_timer)) {
2000 timer_active = 2;
2001 timer_expires = sp->sk_timer.expires;
2002 } else {
2003 timer_active = 0;
2004 timer_expires = jiffies;
2005 }
2006
2007 state = inet_sk_state_load(sp);
2008 if (state == TCP_LISTEN)
2009 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2010 else
2011 /* Because we don't lock the socket,
2012 * we might find a transient negative value.
2013 */
2014 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2015 READ_ONCE(tp->copied_seq), 0);
2016
2017 seq_printf(seq,
2018 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2019 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2020 i,
2021 src->s6_addr32[0], src->s6_addr32[1],
2022 src->s6_addr32[2], src->s6_addr32[3], srcp,
2023 dest->s6_addr32[0], dest->s6_addr32[1],
2024 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2025 state,
2026 READ_ONCE(tp->write_seq) - tp->snd_una,
2027 rx_queue,
2028 timer_active,
2029 jiffies_delta_to_clock_t(timer_expires - jiffies),
2030 icsk->icsk_retransmits,
2031 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2032 icsk->icsk_probes_out,
2033 sock_i_ino(sp),
2034 refcount_read(&sp->sk_refcnt), sp,
2035 jiffies_to_clock_t(icsk->icsk_rto),
2036 jiffies_to_clock_t(icsk->icsk_ack.ato),
2037 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2038 tp->snd_cwnd,
2039 state == TCP_LISTEN ?
2040 fastopenq->max_qlen :
2041 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2042 );
2043 }
2044
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2045 static void get_timewait6_sock(struct seq_file *seq,
2046 struct inet_timewait_sock *tw, int i)
2047 {
2048 long delta = tw->tw_timer.expires - jiffies;
2049 const struct in6_addr *dest, *src;
2050 __u16 destp, srcp;
2051
2052 dest = &tw->tw_v6_daddr;
2053 src = &tw->tw_v6_rcv_saddr;
2054 destp = ntohs(tw->tw_dport);
2055 srcp = ntohs(tw->tw_sport);
2056
2057 seq_printf(seq,
2058 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2059 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2060 i,
2061 src->s6_addr32[0], src->s6_addr32[1],
2062 src->s6_addr32[2], src->s6_addr32[3], srcp,
2063 dest->s6_addr32[0], dest->s6_addr32[1],
2064 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2065 tw->tw_substate, 0, 0,
2066 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2067 refcount_read(&tw->tw_refcnt), tw);
2068 }
2069
tcp6_seq_show(struct seq_file * seq,void * v)2070 static int tcp6_seq_show(struct seq_file *seq, void *v)
2071 {
2072 struct tcp_iter_state *st;
2073 struct sock *sk = v;
2074
2075 if (v == SEQ_START_TOKEN) {
2076 seq_puts(seq,
2077 " sl "
2078 "local_address "
2079 "remote_address "
2080 "st tx_queue rx_queue tr tm->when retrnsmt"
2081 " uid timeout inode\n");
2082 goto out;
2083 }
2084 st = seq->private;
2085
2086 if (sk->sk_state == TCP_TIME_WAIT)
2087 get_timewait6_sock(seq, v, st->num);
2088 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2089 get_openreq6(seq, v, st->num);
2090 else
2091 get_tcp6_sock(seq, v, st->num);
2092 out:
2093 return 0;
2094 }
2095
2096 static const struct seq_operations tcp6_seq_ops = {
2097 .show = tcp6_seq_show,
2098 .start = tcp_seq_start,
2099 .next = tcp_seq_next,
2100 .stop = tcp_seq_stop,
2101 };
2102
2103 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2104 .family = AF_INET6,
2105 };
2106
tcp6_proc_init(struct net * net)2107 int __net_init tcp6_proc_init(struct net *net)
2108 {
2109 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2110 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2111 return -ENOMEM;
2112 return 0;
2113 }
2114
tcp6_proc_exit(struct net * net)2115 void tcp6_proc_exit(struct net *net)
2116 {
2117 remove_proc_entry("tcp6", net->proc_net);
2118 }
2119 #endif
2120
2121 struct proto tcpv6_prot = {
2122 .name = "TCPv6",
2123 .owner = THIS_MODULE,
2124 .close = tcp_close,
2125 .pre_connect = tcp_v6_pre_connect,
2126 .connect = tcp_v6_connect,
2127 .disconnect = tcp_disconnect,
2128 .accept = inet_csk_accept,
2129 .ioctl = tcp_ioctl,
2130 .init = tcp_v6_init_sock,
2131 .destroy = tcp_v4_destroy_sock,
2132 .shutdown = tcp_shutdown,
2133 .setsockopt = tcp_setsockopt,
2134 .getsockopt = tcp_getsockopt,
2135 .keepalive = tcp_set_keepalive,
2136 .recvmsg = tcp_recvmsg,
2137 .sendmsg = tcp_sendmsg,
2138 .sendpage = tcp_sendpage,
2139 .backlog_rcv = tcp_v6_do_rcv,
2140 .release_cb = tcp_release_cb,
2141 .hash = inet6_hash,
2142 .unhash = inet_unhash,
2143 .get_port = inet_csk_get_port,
2144 .enter_memory_pressure = tcp_enter_memory_pressure,
2145 .leave_memory_pressure = tcp_leave_memory_pressure,
2146 .stream_memory_free = tcp_stream_memory_free,
2147 .sockets_allocated = &tcp_sockets_allocated,
2148 .memory_allocated = &tcp_memory_allocated,
2149 .memory_pressure = &tcp_memory_pressure,
2150 .orphan_count = &tcp_orphan_count,
2151 .sysctl_mem = sysctl_tcp_mem,
2152 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2153 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2154 .max_header = MAX_TCP_HEADER,
2155 .obj_size = sizeof(struct tcp6_sock),
2156 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2157 .twsk_prot = &tcp6_timewait_sock_ops,
2158 .rsk_prot = &tcp6_request_sock_ops,
2159 .h.hashinfo = &tcp_hashinfo,
2160 .no_autobind = true,
2161 .diag_destroy = tcp_abort,
2162 };
2163 EXPORT_SYMBOL_GPL(tcpv6_prot);
2164
2165 static const struct inet6_protocol tcpv6_protocol = {
2166 .handler = tcp_v6_rcv,
2167 .err_handler = tcp_v6_err,
2168 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2169 };
2170
2171 static struct inet_protosw tcpv6_protosw = {
2172 .type = SOCK_STREAM,
2173 .protocol = IPPROTO_TCP,
2174 .prot = &tcpv6_prot,
2175 .ops = &inet6_stream_ops,
2176 .flags = INET_PROTOSW_PERMANENT |
2177 INET_PROTOSW_ICSK,
2178 };
2179
tcpv6_net_init(struct net * net)2180 static int __net_init tcpv6_net_init(struct net *net)
2181 {
2182 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2183 SOCK_RAW, IPPROTO_TCP, net);
2184 }
2185
tcpv6_net_exit(struct net * net)2186 static void __net_exit tcpv6_net_exit(struct net *net)
2187 {
2188 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2189 }
2190
tcpv6_net_exit_batch(struct list_head * net_exit_list)2191 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2192 {
2193 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2194 }
2195
2196 static struct pernet_operations tcpv6_net_ops = {
2197 .init = tcpv6_net_init,
2198 .exit = tcpv6_net_exit,
2199 .exit_batch = tcpv6_net_exit_batch,
2200 };
2201
tcpv6_init(void)2202 int __init tcpv6_init(void)
2203 {
2204 int ret;
2205
2206 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2207 if (ret)
2208 goto out;
2209
2210 /* register inet6 protocol */
2211 ret = inet6_register_protosw(&tcpv6_protosw);
2212 if (ret)
2213 goto out_tcpv6_protocol;
2214
2215 ret = register_pernet_subsys(&tcpv6_net_ops);
2216 if (ret)
2217 goto out_tcpv6_protosw;
2218
2219 ret = mptcpv6_init();
2220 if (ret)
2221 goto out_tcpv6_pernet_subsys;
2222
2223 out:
2224 return ret;
2225
2226 out_tcpv6_pernet_subsys:
2227 unregister_pernet_subsys(&tcpv6_net_ops);
2228 out_tcpv6_protosw:
2229 inet6_unregister_protosw(&tcpv6_protosw);
2230 out_tcpv6_protocol:
2231 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2232 goto out;
2233 }
2234
tcpv6_exit(void)2235 void tcpv6_exit(void)
2236 {
2237 unregister_pernet_subsys(&tcpv6_net_ops);
2238 inet6_unregister_protosw(&tcpv6_protosw);
2239 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2240 }
2241