• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 #include <trace/hooks/net.h>
71 
72 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 				      struct request_sock *req);
75 
76 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 						   const struct in6_addr *addr,
86 						   int l3index)
87 {
88 	return NULL;
89 }
90 #endif
91 
92 /* Helper returning the inet6 address from a given tcp socket.
93  * It can be used in TCP stack instead of inet6_sk(sk).
94  * This avoids a dereference and allow compiler optimizations.
95  * It is a specialized version of inet6_sk_generic().
96  */
97 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
98 					      struct tcp6_sock, tcp)->inet6)
99 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)100 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
101 {
102 	struct dst_entry *dst = skb_dst(skb);
103 
104 	if (dst && dst_hold_safe(dst)) {
105 		const struct rt6_info *rt = (const struct rt6_info *)dst;
106 
107 		rcu_assign_pointer(sk->sk_rx_dst, dst);
108 		sk->sk_rx_dst_ifindex = skb->skb_iif;
109 		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
110 	}
111 }
112 
tcp_v6_init_seq(const struct sk_buff * skb)113 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
114 {
115 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
116 				ipv6_hdr(skb)->saddr.s6_addr32,
117 				tcp_hdr(skb)->dest,
118 				tcp_hdr(skb)->source);
119 }
120 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)121 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
122 {
123 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
124 				   ipv6_hdr(skb)->saddr.s6_addr32);
125 }
126 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)127 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
128 			      int addr_len)
129 {
130 	/* This check is replicated from tcp_v6_connect() and intended to
131 	 * prevent BPF program called below from accessing bytes that are out
132 	 * of the bound specified by user in addr_len.
133 	 */
134 	if (addr_len < SIN6_LEN_RFC2133)
135 		return -EINVAL;
136 
137 	sock_owned_by_me(sk);
138 
139 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
140 }
141 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)142 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
143 			  int addr_len)
144 {
145 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
146 	struct inet_connection_sock *icsk = inet_csk(sk);
147 	struct in6_addr *saddr = NULL, *final_p, final;
148 	struct inet_timewait_death_row *tcp_death_row;
149 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
150 	struct inet_sock *inet = inet_sk(sk);
151 	struct tcp_sock *tp = tcp_sk(sk);
152 	struct net *net = sock_net(sk);
153 	struct ipv6_txoptions *opt;
154 	struct dst_entry *dst;
155 	struct flowi6 fl6;
156 	int addr_type;
157 	int err;
158 
159 	if (addr_len < SIN6_LEN_RFC2133)
160 		return -EINVAL;
161 
162 	if (usin->sin6_family != AF_INET6)
163 		return -EAFNOSUPPORT;
164 
165 	memset(&fl6, 0, sizeof(fl6));
166 
167 	if (np->sndflow) {
168 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
169 		IP6_ECN_flow_init(fl6.flowlabel);
170 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
171 			struct ip6_flowlabel *flowlabel;
172 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
173 			if (IS_ERR(flowlabel))
174 				return -EINVAL;
175 			fl6_sock_release(flowlabel);
176 		}
177 	}
178 
179 	/*
180 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
181 	 */
182 
183 	if (ipv6_addr_any(&usin->sin6_addr)) {
184 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
185 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
186 					       &usin->sin6_addr);
187 		else
188 			usin->sin6_addr = in6addr_loopback;
189 	}
190 
191 	addr_type = ipv6_addr_type(&usin->sin6_addr);
192 
193 	if (addr_type & IPV6_ADDR_MULTICAST)
194 		return -ENETUNREACH;
195 
196 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
197 		if (addr_len >= sizeof(struct sockaddr_in6) &&
198 		    usin->sin6_scope_id) {
199 			/* If interface is set while binding, indices
200 			 * must coincide.
201 			 */
202 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
203 				return -EINVAL;
204 
205 			sk->sk_bound_dev_if = usin->sin6_scope_id;
206 		}
207 
208 		/* Connect to link-local address requires an interface */
209 		if (!sk->sk_bound_dev_if)
210 			return -EINVAL;
211 	}
212 
213 	if (tp->rx_opt.ts_recent_stamp &&
214 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
215 		tp->rx_opt.ts_recent = 0;
216 		tp->rx_opt.ts_recent_stamp = 0;
217 		WRITE_ONCE(tp->write_seq, 0);
218 	}
219 
220 	sk->sk_v6_daddr = usin->sin6_addr;
221 	np->flow_label = fl6.flowlabel;
222 
223 	/*
224 	 *	TCP over IPv4
225 	 */
226 
227 	if (addr_type & IPV6_ADDR_MAPPED) {
228 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
229 		struct sockaddr_in sin;
230 
231 		if (ipv6_only_sock(sk))
232 			return -ENETUNREACH;
233 
234 		sin.sin_family = AF_INET;
235 		sin.sin_port = usin->sin6_port;
236 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
237 
238 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
239 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
240 		if (sk_is_mptcp(sk))
241 			mptcpv6_handle_mapped(sk, true);
242 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
243 #ifdef CONFIG_TCP_MD5SIG
244 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
245 #endif
246 
247 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
248 
249 		if (err) {
250 			icsk->icsk_ext_hdr_len = exthdrlen;
251 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
252 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	trace_android_vh_tcp_v6_connect(sk, uaddr);
267 
268 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269 		saddr = &sk->sk_v6_rcv_saddr;
270 
271 	fl6.flowi6_proto = IPPROTO_TCP;
272 	fl6.daddr = sk->sk_v6_daddr;
273 	fl6.saddr = saddr ? *saddr : np->saddr;
274 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
275 	fl6.flowi6_oif = sk->sk_bound_dev_if;
276 	fl6.flowi6_mark = sk->sk_mark;
277 	fl6.fl6_dport = usin->sin6_port;
278 	fl6.fl6_sport = inet->inet_sport;
279 	fl6.flowi6_uid = sk->sk_uid;
280 
281 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282 	final_p = fl6_update_dst(&fl6, opt, &final);
283 
284 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285 
286 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
287 	if (IS_ERR(dst)) {
288 		err = PTR_ERR(dst);
289 		goto failure;
290 	}
291 
292 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
293 
294 	if (!saddr) {
295 		saddr = &fl6.saddr;
296 
297 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
298 		if (err)
299 			goto failure;
300 	}
301 
302 	/* set the source address */
303 	np->saddr = *saddr;
304 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
305 
306 	sk->sk_gso_type = SKB_GSO_TCPV6;
307 	ip6_dst_store(sk, dst, NULL, NULL);
308 
309 	icsk->icsk_ext_hdr_len = 0;
310 	if (opt)
311 		icsk->icsk_ext_hdr_len = opt->opt_flen +
312 					 opt->opt_nflen;
313 
314 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
315 
316 	inet->inet_dport = usin->sin6_port;
317 
318 	tcp_set_state(sk, TCP_SYN_SENT);
319 	err = inet6_hash_connect(tcp_death_row, sk);
320 	if (err)
321 		goto late_failure;
322 
323 	sk_set_txhash(sk);
324 
325 	if (likely(!tp->repair)) {
326 		if (!tp->write_seq)
327 			WRITE_ONCE(tp->write_seq,
328 				   secure_tcpv6_seq(np->saddr.s6_addr32,
329 						    sk->sk_v6_daddr.s6_addr32,
330 						    inet->inet_sport,
331 						    inet->inet_dport));
332 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
333 						   sk->sk_v6_daddr.s6_addr32);
334 	}
335 
336 	if (tcp_fastopen_defer_connect(sk, &err))
337 		return err;
338 	if (err)
339 		goto late_failure;
340 
341 	err = tcp_connect(sk);
342 	if (err)
343 		goto late_failure;
344 
345 	return 0;
346 
347 late_failure:
348 	tcp_set_state(sk, TCP_CLOSE);
349 	inet_bhash2_reset_saddr(sk);
350 failure:
351 	inet->inet_dport = 0;
352 	sk->sk_route_caps = 0;
353 	return err;
354 }
355 
tcp_v6_mtu_reduced(struct sock * sk)356 static void tcp_v6_mtu_reduced(struct sock *sk)
357 {
358 	struct dst_entry *dst;
359 	u32 mtu;
360 
361 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
362 		return;
363 
364 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
365 
366 	/* Drop requests trying to increase our current mss.
367 	 * Check done in __ip6_rt_update_pmtu() is too late.
368 	 */
369 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
370 		return;
371 
372 	dst = inet6_csk_update_pmtu(sk, mtu);
373 	if (!dst)
374 		return;
375 
376 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
377 		tcp_sync_mss(sk, dst_mtu(dst));
378 		tcp_simple_retransmit(sk);
379 	}
380 }
381 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)382 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
383 		u8 type, u8 code, int offset, __be32 info)
384 {
385 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
386 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
387 	struct net *net = dev_net(skb->dev);
388 	struct request_sock *fastopen;
389 	struct ipv6_pinfo *np;
390 	struct tcp_sock *tp;
391 	__u32 seq, snd_una;
392 	struct sock *sk;
393 	bool fatal;
394 	int err;
395 
396 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
397 					&hdr->daddr, th->dest,
398 					&hdr->saddr, ntohs(th->source),
399 					skb->dev->ifindex, inet6_sdif(skb));
400 
401 	if (!sk) {
402 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
403 				  ICMP6_MIB_INERRORS);
404 		return -ENOENT;
405 	}
406 
407 	if (sk->sk_state == TCP_TIME_WAIT) {
408 		inet_twsk_put(inet_twsk(sk));
409 		return 0;
410 	}
411 	seq = ntohl(th->seq);
412 	fatal = icmpv6_err_convert(type, code, &err);
413 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
414 		tcp_req_err(sk, seq, fatal);
415 		return 0;
416 	}
417 
418 	bh_lock_sock(sk);
419 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
420 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
421 
422 	if (sk->sk_state == TCP_CLOSE)
423 		goto out;
424 
425 	if (static_branch_unlikely(&ip6_min_hopcount)) {
426 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
427 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
428 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
429 			goto out;
430 		}
431 	}
432 
433 	tp = tcp_sk(sk);
434 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
435 	fastopen = rcu_dereference(tp->fastopen_rsk);
436 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
437 	if (sk->sk_state != TCP_LISTEN &&
438 	    !between(seq, snd_una, tp->snd_nxt)) {
439 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
440 		goto out;
441 	}
442 
443 	np = tcp_inet6_sk(sk);
444 
445 	if (type == NDISC_REDIRECT) {
446 		if (!sock_owned_by_user(sk)) {
447 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
448 
449 			if (dst)
450 				dst->ops->redirect(dst, sk, skb);
451 		}
452 		goto out;
453 	}
454 
455 	if (type == ICMPV6_PKT_TOOBIG) {
456 		u32 mtu = ntohl(info);
457 
458 		/* We are not interested in TCP_LISTEN and open_requests
459 		 * (SYN-ACKs send out by Linux are always <576bytes so
460 		 * they should go through unfragmented).
461 		 */
462 		if (sk->sk_state == TCP_LISTEN)
463 			goto out;
464 
465 		if (!ip6_sk_accept_pmtu(sk))
466 			goto out;
467 
468 		if (mtu < IPV6_MIN_MTU)
469 			goto out;
470 
471 		WRITE_ONCE(tp->mtu_info, mtu);
472 
473 		if (!sock_owned_by_user(sk))
474 			tcp_v6_mtu_reduced(sk);
475 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
476 					   &sk->sk_tsq_flags))
477 			sock_hold(sk);
478 		goto out;
479 	}
480 
481 
482 	/* Might be for an request_sock */
483 	switch (sk->sk_state) {
484 	case TCP_SYN_SENT:
485 	case TCP_SYN_RECV:
486 		/* Only in fast or simultaneous open. If a fast open socket is
487 		 * already accepted it is treated as a connected one below.
488 		 */
489 		if (fastopen && !fastopen->sk)
490 			break;
491 
492 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
493 
494 		if (!sock_owned_by_user(sk))
495 			tcp_done_with_error(sk, err);
496 		else
497 			WRITE_ONCE(sk->sk_err_soft, err);
498 		goto out;
499 	case TCP_LISTEN:
500 		break;
501 	default:
502 		/* check if this ICMP message allows revert of backoff.
503 		 * (see RFC 6069)
504 		 */
505 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
506 		    code == ICMPV6_NOROUTE)
507 			tcp_ld_RTO_revert(sk, seq);
508 	}
509 
510 	if (!sock_owned_by_user(sk) && np->recverr) {
511 		WRITE_ONCE(sk->sk_err, err);
512 		sk_error_report(sk);
513 	} else {
514 		WRITE_ONCE(sk->sk_err_soft, err);
515 	}
516 out:
517 	bh_unlock_sock(sk);
518 	sock_put(sk);
519 	return 0;
520 }
521 
522 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)523 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
524 			      struct flowi *fl,
525 			      struct request_sock *req,
526 			      struct tcp_fastopen_cookie *foc,
527 			      enum tcp_synack_type synack_type,
528 			      struct sk_buff *syn_skb)
529 {
530 	struct inet_request_sock *ireq = inet_rsk(req);
531 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
532 	struct ipv6_txoptions *opt;
533 	struct flowi6 *fl6 = &fl->u.ip6;
534 	struct sk_buff *skb;
535 	int err = -ENOMEM;
536 	u8 tclass;
537 
538 	/* First, grab a route. */
539 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
540 					       IPPROTO_TCP)) == NULL)
541 		goto done;
542 
543 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
544 
545 	if (skb) {
546 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
547 				    &ireq->ir_v6_rmt_addr);
548 
549 		fl6->daddr = ireq->ir_v6_rmt_addr;
550 		if (np->repflow && ireq->pktopts)
551 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
552 
553 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
554 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
555 				(np->tclass & INET_ECN_MASK) :
556 				np->tclass;
557 
558 		if (!INET_ECN_is_capable(tclass) &&
559 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
560 			tclass |= INET_ECN_ECT_0;
561 
562 		rcu_read_lock();
563 		opt = ireq->ipv6_opt;
564 		if (!opt)
565 			opt = rcu_dereference(np->opt);
566 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
567 			       opt, tclass, sk->sk_priority);
568 		rcu_read_unlock();
569 		err = net_xmit_eval(err);
570 	}
571 
572 done:
573 	return err;
574 }
575 
576 
tcp_v6_reqsk_destructor(struct request_sock * req)577 static void tcp_v6_reqsk_destructor(struct request_sock *req)
578 {
579 	kfree(inet_rsk(req)->ipv6_opt);
580 	consume_skb(inet_rsk(req)->pktopts);
581 }
582 
583 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)584 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
585 						   const struct in6_addr *addr,
586 						   int l3index)
587 {
588 	return tcp_md5_do_lookup(sk, l3index,
589 				 (union tcp_md5_addr *)addr, AF_INET6);
590 }
591 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)592 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
593 						const struct sock *addr_sk)
594 {
595 	int l3index;
596 
597 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
598 						 addr_sk->sk_bound_dev_if);
599 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
600 				    l3index);
601 }
602 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)603 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
604 				 sockptr_t optval, int optlen)
605 {
606 	struct tcp_md5sig cmd;
607 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
608 	int l3index = 0;
609 	u8 prefixlen;
610 	u8 flags;
611 
612 	if (optlen < sizeof(cmd))
613 		return -EINVAL;
614 
615 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
616 		return -EFAULT;
617 
618 	if (sin6->sin6_family != AF_INET6)
619 		return -EINVAL;
620 
621 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
622 
623 	if (optname == TCP_MD5SIG_EXT &&
624 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
625 		prefixlen = cmd.tcpm_prefixlen;
626 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
627 					prefixlen > 32))
628 			return -EINVAL;
629 	} else {
630 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
631 	}
632 
633 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
634 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
635 		struct net_device *dev;
636 
637 		rcu_read_lock();
638 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
639 		if (dev && netif_is_l3_master(dev))
640 			l3index = dev->ifindex;
641 		rcu_read_unlock();
642 
643 		/* ok to reference set/not set outside of rcu;
644 		 * right now device MUST be an L3 master
645 		 */
646 		if (!dev || !l3index)
647 			return -EINVAL;
648 	}
649 
650 	if (!cmd.tcpm_keylen) {
651 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
652 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
653 					      AF_INET, prefixlen,
654 					      l3index, flags);
655 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
656 				      AF_INET6, prefixlen, l3index, flags);
657 	}
658 
659 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
660 		return -EINVAL;
661 
662 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
663 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
664 				      AF_INET, prefixlen, l3index, flags,
665 				      cmd.tcpm_key, cmd.tcpm_keylen);
666 
667 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
668 			      AF_INET6, prefixlen, l3index, flags,
669 			      cmd.tcpm_key, cmd.tcpm_keylen);
670 }
671 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)672 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
673 				   const struct in6_addr *daddr,
674 				   const struct in6_addr *saddr,
675 				   const struct tcphdr *th, int nbytes)
676 {
677 	struct tcp6_pseudohdr *bp;
678 	struct scatterlist sg;
679 	struct tcphdr *_th;
680 
681 	bp = hp->scratch;
682 	/* 1. TCP pseudo-header (RFC2460) */
683 	bp->saddr = *saddr;
684 	bp->daddr = *daddr;
685 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
686 	bp->len = cpu_to_be32(nbytes);
687 
688 	_th = (struct tcphdr *)(bp + 1);
689 	memcpy(_th, th, sizeof(*th));
690 	_th->check = 0;
691 
692 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
693 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
694 				sizeof(*bp) + sizeof(*th));
695 	return crypto_ahash_update(hp->md5_req);
696 }
697 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)698 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
699 			       const struct in6_addr *daddr, struct in6_addr *saddr,
700 			       const struct tcphdr *th)
701 {
702 	struct tcp_md5sig_pool *hp;
703 	struct ahash_request *req;
704 
705 	hp = tcp_get_md5sig_pool();
706 	if (!hp)
707 		goto clear_hash_noput;
708 	req = hp->md5_req;
709 
710 	if (crypto_ahash_init(req))
711 		goto clear_hash;
712 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
713 		goto clear_hash;
714 	if (tcp_md5_hash_key(hp, key))
715 		goto clear_hash;
716 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
717 	if (crypto_ahash_final(req))
718 		goto clear_hash;
719 
720 	tcp_put_md5sig_pool();
721 	return 0;
722 
723 clear_hash:
724 	tcp_put_md5sig_pool();
725 clear_hash_noput:
726 	memset(md5_hash, 0, 16);
727 	return 1;
728 }
729 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)730 static int tcp_v6_md5_hash_skb(char *md5_hash,
731 			       const struct tcp_md5sig_key *key,
732 			       const struct sock *sk,
733 			       const struct sk_buff *skb)
734 {
735 	const struct in6_addr *saddr, *daddr;
736 	struct tcp_md5sig_pool *hp;
737 	struct ahash_request *req;
738 	const struct tcphdr *th = tcp_hdr(skb);
739 
740 	if (sk) { /* valid for establish/request sockets */
741 		saddr = &sk->sk_v6_rcv_saddr;
742 		daddr = &sk->sk_v6_daddr;
743 	} else {
744 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
745 		saddr = &ip6h->saddr;
746 		daddr = &ip6h->daddr;
747 	}
748 
749 	hp = tcp_get_md5sig_pool();
750 	if (!hp)
751 		goto clear_hash_noput;
752 	req = hp->md5_req;
753 
754 	if (crypto_ahash_init(req))
755 		goto clear_hash;
756 
757 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
758 		goto clear_hash;
759 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
760 		goto clear_hash;
761 	if (tcp_md5_hash_key(hp, key))
762 		goto clear_hash;
763 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
764 	if (crypto_ahash_final(req))
765 		goto clear_hash;
766 
767 	tcp_put_md5sig_pool();
768 	return 0;
769 
770 clear_hash:
771 	tcp_put_md5sig_pool();
772 clear_hash_noput:
773 	memset(md5_hash, 0, 16);
774 	return 1;
775 }
776 
777 #endif
778 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)779 static void tcp_v6_init_req(struct request_sock *req,
780 			    const struct sock *sk_listener,
781 			    struct sk_buff *skb)
782 {
783 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
784 	struct inet_request_sock *ireq = inet_rsk(req);
785 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
786 
787 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
788 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
789 
790 	/* So that link locals have meaning */
791 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
792 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
793 		ireq->ir_iif = tcp_v6_iif(skb);
794 
795 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
796 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
797 	     np->rxopt.bits.rxinfo ||
798 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
799 	     np->rxopt.bits.rxohlim || np->repflow)) {
800 		refcount_inc(&skb->users);
801 		ireq->pktopts = skb;
802 	}
803 }
804 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)805 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
806 					  struct sk_buff *skb,
807 					  struct flowi *fl,
808 					  struct request_sock *req)
809 {
810 	tcp_v6_init_req(req, sk, skb);
811 
812 	if (security_inet_conn_request(sk, skb, req))
813 		return NULL;
814 
815 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
816 }
817 
818 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
819 	.family		=	AF_INET6,
820 	.obj_size	=	sizeof(struct tcp6_request_sock),
821 	.rtx_syn_ack	=	tcp_rtx_synack,
822 	.send_ack	=	tcp_v6_reqsk_send_ack,
823 	.destructor	=	tcp_v6_reqsk_destructor,
824 	.send_reset	=	tcp_v6_send_reset,
825 	.syn_ack_timeout =	tcp_syn_ack_timeout,
826 };
827 
828 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
829 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
830 				sizeof(struct ipv6hdr),
831 #ifdef CONFIG_TCP_MD5SIG
832 	.req_md5_lookup	=	tcp_v6_md5_lookup,
833 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
834 #endif
835 #ifdef CONFIG_SYN_COOKIES
836 	.cookie_init_seq =	cookie_v6_init_sequence,
837 #endif
838 	.route_req	=	tcp_v6_route_req,
839 	.init_seq	=	tcp_v6_init_seq,
840 	.init_ts_off	=	tcp_v6_init_ts_off,
841 	.send_synack	=	tcp_v6_send_synack,
842 };
843 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)844 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
845 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
846 				 int oif, struct tcp_md5sig_key *key, int rst,
847 				 u8 tclass, __be32 label, u32 priority, u32 txhash)
848 {
849 	const struct tcphdr *th = tcp_hdr(skb);
850 	struct tcphdr *t1;
851 	struct sk_buff *buff;
852 	struct flowi6 fl6;
853 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
854 	struct sock *ctl_sk = net->ipv6.tcp_sk;
855 	unsigned int tot_len = sizeof(struct tcphdr);
856 	__be32 mrst = 0, *topt;
857 	struct dst_entry *dst;
858 	__u32 mark = 0;
859 
860 	if (tsecr)
861 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
862 #ifdef CONFIG_TCP_MD5SIG
863 	if (key)
864 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
865 #endif
866 
867 #ifdef CONFIG_MPTCP
868 	if (rst && !key) {
869 		mrst = mptcp_reset_option(skb);
870 
871 		if (mrst)
872 			tot_len += sizeof(__be32);
873 	}
874 #endif
875 
876 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
877 	if (!buff)
878 		return;
879 
880 	skb_reserve(buff, MAX_TCP_HEADER);
881 
882 	t1 = skb_push(buff, tot_len);
883 	skb_reset_transport_header(buff);
884 
885 	/* Swap the send and the receive. */
886 	memset(t1, 0, sizeof(*t1));
887 	t1->dest = th->source;
888 	t1->source = th->dest;
889 	t1->doff = tot_len / 4;
890 	t1->seq = htonl(seq);
891 	t1->ack_seq = htonl(ack);
892 	t1->ack = !rst || !th->ack;
893 	t1->rst = rst;
894 	t1->window = htons(win);
895 
896 	topt = (__be32 *)(t1 + 1);
897 
898 	if (tsecr) {
899 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
900 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
901 		*topt++ = htonl(tsval);
902 		*topt++ = htonl(tsecr);
903 	}
904 
905 	if (mrst)
906 		*topt++ = mrst;
907 
908 #ifdef CONFIG_TCP_MD5SIG
909 	if (key) {
910 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
911 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
912 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
913 				    &ipv6_hdr(skb)->saddr,
914 				    &ipv6_hdr(skb)->daddr, t1);
915 	}
916 #endif
917 
918 	memset(&fl6, 0, sizeof(fl6));
919 	fl6.daddr = ipv6_hdr(skb)->saddr;
920 	fl6.saddr = ipv6_hdr(skb)->daddr;
921 	fl6.flowlabel = label;
922 
923 	buff->ip_summed = CHECKSUM_PARTIAL;
924 
925 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
926 
927 	fl6.flowi6_proto = IPPROTO_TCP;
928 	if (rt6_need_strict(&fl6.daddr) && !oif)
929 		fl6.flowi6_oif = tcp_v6_iif(skb);
930 	else {
931 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
932 			oif = skb->skb_iif;
933 
934 		fl6.flowi6_oif = oif;
935 	}
936 
937 	if (sk) {
938 		if (sk->sk_state == TCP_TIME_WAIT)
939 			mark = inet_twsk(sk)->tw_mark;
940 		else
941 			mark = READ_ONCE(sk->sk_mark);
942 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
943 	}
944 	if (txhash) {
945 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
946 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
947 	}
948 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
949 	fl6.fl6_dport = t1->dest;
950 	fl6.fl6_sport = t1->source;
951 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
952 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
953 
954 	/* Pass a socket to ip6_dst_lookup either it is for RST
955 	 * Underlying function will use this to retrieve the network
956 	 * namespace
957 	 */
958 	if (sk && sk->sk_state != TCP_TIME_WAIT)
959 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
960 	else
961 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
962 	if (!IS_ERR(dst)) {
963 		skb_dst_set(buff, dst);
964 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
965 			 tclass & ~INET_ECN_MASK, priority);
966 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
967 		if (rst)
968 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
969 		return;
970 	}
971 
972 	kfree_skb(buff);
973 }
974 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)975 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
976 {
977 	const struct tcphdr *th = tcp_hdr(skb);
978 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
979 	u32 seq = 0, ack_seq = 0;
980 	struct tcp_md5sig_key *key = NULL;
981 #ifdef CONFIG_TCP_MD5SIG
982 	const __u8 *hash_location = NULL;
983 	unsigned char newhash[16];
984 	int genhash;
985 	struct sock *sk1 = NULL;
986 #endif
987 	__be32 label = 0;
988 	u32 priority = 0;
989 	struct net *net;
990 	u32 txhash = 0;
991 	int oif = 0;
992 
993 	if (th->rst)
994 		return;
995 
996 	/* If sk not NULL, it means we did a successful lookup and incoming
997 	 * route had to be correct. prequeue might have dropped our dst.
998 	 */
999 	if (!sk && !ipv6_unicast_destination(skb))
1000 		return;
1001 
1002 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1003 #ifdef CONFIG_TCP_MD5SIG
1004 	rcu_read_lock();
1005 	hash_location = tcp_parse_md5sig_option(th);
1006 	if (sk && sk_fullsock(sk)) {
1007 		int l3index;
1008 
1009 		/* sdif set, means packet ingressed via a device
1010 		 * in an L3 domain and inet_iif is set to it.
1011 		 */
1012 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1013 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1014 	} else if (hash_location) {
1015 		int dif = tcp_v6_iif_l3_slave(skb);
1016 		int sdif = tcp_v6_sdif(skb);
1017 		int l3index;
1018 
1019 		/*
1020 		 * active side is lost. Try to find listening socket through
1021 		 * source port, and then find md5 key through listening socket.
1022 		 * we are not loose security here:
1023 		 * Incoming packet is checked with md5 hash with finding key,
1024 		 * no RST generated if md5 hash doesn't match.
1025 		 */
1026 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1027 					    NULL, 0, &ipv6h->saddr, th->source,
1028 					    &ipv6h->daddr, ntohs(th->source),
1029 					    dif, sdif);
1030 		if (!sk1)
1031 			goto out;
1032 
1033 		/* sdif set, means packet ingressed via a device
1034 		 * in an L3 domain and dif is set to it.
1035 		 */
1036 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1037 
1038 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1039 		if (!key)
1040 			goto out;
1041 
1042 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1043 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1044 			goto out;
1045 	}
1046 #endif
1047 
1048 	if (th->ack)
1049 		seq = ntohl(th->ack_seq);
1050 	else
1051 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1052 			  (th->doff << 2);
1053 
1054 	if (sk) {
1055 		oif = sk->sk_bound_dev_if;
1056 		if (sk_fullsock(sk)) {
1057 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1058 
1059 			trace_tcp_send_reset(sk, skb);
1060 			if (np->repflow)
1061 				label = ip6_flowlabel(ipv6h);
1062 			priority = sk->sk_priority;
1063 			txhash = sk->sk_txhash;
1064 		}
1065 		if (sk->sk_state == TCP_TIME_WAIT) {
1066 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1067 			priority = inet_twsk(sk)->tw_priority;
1068 			txhash = inet_twsk(sk)->tw_txhash;
1069 		}
1070 	} else {
1071 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1072 			label = ip6_flowlabel(ipv6h);
1073 	}
1074 
1075 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1076 			     ipv6_get_dsfield(ipv6h), label, priority, txhash);
1077 
1078 #ifdef CONFIG_TCP_MD5SIG
1079 out:
1080 	rcu_read_unlock();
1081 #endif
1082 }
1083 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1084 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1085 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1086 			    struct tcp_md5sig_key *key, u8 tclass,
1087 			    __be32 label, u32 priority, u32 txhash)
1088 {
1089 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1090 			     tclass, label, priority, txhash);
1091 }
1092 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1093 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1094 {
1095 	struct inet_timewait_sock *tw = inet_twsk(sk);
1096 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1097 
1098 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1099 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1100 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1101 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1102 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1103 			tw->tw_txhash);
1104 
1105 	inet_twsk_put(tw);
1106 }
1107 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1108 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1109 				  struct request_sock *req)
1110 {
1111 	int l3index;
1112 
1113 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1114 
1115 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1116 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1117 	 */
1118 	/* RFC 7323 2.3
1119 	 * The window field (SEG.WND) of every outgoing segment, with the
1120 	 * exception of <SYN> segments, MUST be right-shifted by
1121 	 * Rcv.Wind.Shift bits:
1122 	 */
1123 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1124 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1125 			tcp_rsk(req)->rcv_nxt,
1126 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1127 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1128 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1129 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1130 			ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1131 			READ_ONCE(sk->sk_priority),
1132 			READ_ONCE(tcp_rsk(req)->txhash));
1133 }
1134 
1135 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1136 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1137 {
1138 #ifdef CONFIG_SYN_COOKIES
1139 	const struct tcphdr *th = tcp_hdr(skb);
1140 
1141 	if (!th->syn)
1142 		sk = cookie_v6_check(sk, skb);
1143 #endif
1144 	return sk;
1145 }
1146 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1147 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1148 			 struct tcphdr *th, u32 *cookie)
1149 {
1150 	u16 mss = 0;
1151 #ifdef CONFIG_SYN_COOKIES
1152 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1153 				    &tcp_request_sock_ipv6_ops, sk, th);
1154 	if (mss) {
1155 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1156 		tcp_synq_overflow(sk);
1157 	}
1158 #endif
1159 	return mss;
1160 }
1161 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1162 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1163 {
1164 	if (skb->protocol == htons(ETH_P_IP))
1165 		return tcp_v4_conn_request(sk, skb);
1166 
1167 	if (!ipv6_unicast_destination(skb))
1168 		goto drop;
1169 
1170 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1171 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1172 		return 0;
1173 	}
1174 
1175 	return tcp_conn_request(&tcp6_request_sock_ops,
1176 				&tcp_request_sock_ipv6_ops, sk, skb);
1177 
1178 drop:
1179 	tcp_listendrop(sk);
1180 	return 0; /* don't send reset */
1181 }
1182 
tcp_v6_restore_cb(struct sk_buff * skb)1183 static void tcp_v6_restore_cb(struct sk_buff *skb)
1184 {
1185 	/* We need to move header back to the beginning if xfrm6_policy_check()
1186 	 * and tcp_v6_fill_cb() are going to be called again.
1187 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1188 	 */
1189 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1190 		sizeof(struct inet6_skb_parm));
1191 }
1192 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1193 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1194 					 struct request_sock *req,
1195 					 struct dst_entry *dst,
1196 					 struct request_sock *req_unhash,
1197 					 bool *own_req)
1198 {
1199 	struct inet_request_sock *ireq;
1200 	struct ipv6_pinfo *newnp;
1201 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1202 	struct ipv6_txoptions *opt;
1203 	struct inet_sock *newinet;
1204 	bool found_dup_sk = false;
1205 	struct tcp_sock *newtp;
1206 	struct sock *newsk;
1207 #ifdef CONFIG_TCP_MD5SIG
1208 	struct tcp_md5sig_key *key;
1209 	int l3index;
1210 #endif
1211 	struct flowi6 fl6;
1212 
1213 	if (skb->protocol == htons(ETH_P_IP)) {
1214 		/*
1215 		 *	v6 mapped
1216 		 */
1217 
1218 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1219 					     req_unhash, own_req);
1220 
1221 		if (!newsk)
1222 			return NULL;
1223 
1224 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1225 
1226 		newnp = tcp_inet6_sk(newsk);
1227 		newtp = tcp_sk(newsk);
1228 
1229 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1230 
1231 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1232 
1233 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1234 		if (sk_is_mptcp(newsk))
1235 			mptcpv6_handle_mapped(newsk, true);
1236 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1237 #ifdef CONFIG_TCP_MD5SIG
1238 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1239 #endif
1240 
1241 		newnp->ipv6_mc_list = NULL;
1242 		newnp->ipv6_ac_list = NULL;
1243 		newnp->ipv6_fl_list = NULL;
1244 		newnp->pktoptions  = NULL;
1245 		newnp->opt	   = NULL;
1246 		newnp->mcast_oif   = inet_iif(skb);
1247 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1248 		newnp->rcv_flowinfo = 0;
1249 		if (np->repflow)
1250 			newnp->flow_label = 0;
1251 
1252 		/*
1253 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1254 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1255 		 * that function for the gory details. -acme
1256 		 */
1257 
1258 		/* It is tricky place. Until this moment IPv4 tcp
1259 		   worked with IPv6 icsk.icsk_af_ops.
1260 		   Sync it now.
1261 		 */
1262 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1263 
1264 		return newsk;
1265 	}
1266 
1267 	ireq = inet_rsk(req);
1268 
1269 	if (sk_acceptq_is_full(sk))
1270 		goto out_overflow;
1271 
1272 	if (!dst) {
1273 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1274 		if (!dst)
1275 			goto out;
1276 	}
1277 
1278 	newsk = tcp_create_openreq_child(sk, req, skb);
1279 	if (!newsk)
1280 		goto out_nonewsk;
1281 
1282 	/*
1283 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1284 	 * count here, tcp_create_openreq_child now does this for us, see the
1285 	 * comment in that function for the gory details. -acme
1286 	 */
1287 
1288 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1289 	inet6_sk_rx_dst_set(newsk, skb);
1290 
1291 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1292 
1293 	newtp = tcp_sk(newsk);
1294 	newinet = inet_sk(newsk);
1295 	newnp = tcp_inet6_sk(newsk);
1296 
1297 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1298 
1299 	ip6_dst_store(newsk, dst, NULL, NULL);
1300 
1301 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1302 	newnp->saddr = ireq->ir_v6_loc_addr;
1303 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1304 	newsk->sk_bound_dev_if = ireq->ir_iif;
1305 
1306 	/* Now IPv6 options...
1307 
1308 	   First: no IPv4 options.
1309 	 */
1310 	newinet->inet_opt = NULL;
1311 	newnp->ipv6_mc_list = NULL;
1312 	newnp->ipv6_ac_list = NULL;
1313 	newnp->ipv6_fl_list = NULL;
1314 
1315 	/* Clone RX bits */
1316 	newnp->rxopt.all = np->rxopt.all;
1317 
1318 	newnp->pktoptions = NULL;
1319 	newnp->opt	  = NULL;
1320 	newnp->mcast_oif  = tcp_v6_iif(skb);
1321 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1322 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1323 	if (np->repflow)
1324 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1325 
1326 	/* Set ToS of the new socket based upon the value of incoming SYN.
1327 	 * ECT bits are set later in tcp_init_transfer().
1328 	 */
1329 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1330 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1331 
1332 	/* Clone native IPv6 options from listening socket (if any)
1333 
1334 	   Yes, keeping reference count would be much more clever,
1335 	   but we make one more one thing there: reattach optmem
1336 	   to newsk.
1337 	 */
1338 	opt = ireq->ipv6_opt;
1339 	if (!opt)
1340 		opt = rcu_dereference(np->opt);
1341 	if (opt) {
1342 		opt = ipv6_dup_options(newsk, opt);
1343 		RCU_INIT_POINTER(newnp->opt, opt);
1344 	}
1345 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1346 	if (opt)
1347 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1348 						    opt->opt_flen;
1349 
1350 	tcp_ca_openreq_child(newsk, dst);
1351 
1352 	tcp_sync_mss(newsk, dst_mtu(dst));
1353 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1354 
1355 	tcp_initialize_rcv_mss(newsk);
1356 
1357 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1358 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1359 
1360 #ifdef CONFIG_TCP_MD5SIG
1361 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1362 
1363 	/* Copy over the MD5 key from the original socket */
1364 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1365 	if (key) {
1366 		const union tcp_md5_addr *addr;
1367 
1368 		addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1369 		if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1370 			inet_csk_prepare_forced_close(newsk);
1371 			tcp_done(newsk);
1372 			goto out;
1373 		}
1374 	}
1375 #endif
1376 
1377 	if (__inet_inherit_port(sk, newsk) < 0) {
1378 		inet_csk_prepare_forced_close(newsk);
1379 		tcp_done(newsk);
1380 		goto out;
1381 	}
1382 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1383 				       &found_dup_sk);
1384 	if (*own_req) {
1385 		tcp_move_syn(newtp, req);
1386 
1387 		/* Clone pktoptions received with SYN, if we own the req */
1388 		if (ireq->pktopts) {
1389 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1390 			consume_skb(ireq->pktopts);
1391 			ireq->pktopts = NULL;
1392 			if (newnp->pktoptions)
1393 				tcp_v6_restore_cb(newnp->pktoptions);
1394 		}
1395 	} else {
1396 		if (!req_unhash && found_dup_sk) {
1397 			/* This code path should only be executed in the
1398 			 * syncookie case only
1399 			 */
1400 			bh_unlock_sock(newsk);
1401 			sock_put(newsk);
1402 			newsk = NULL;
1403 		}
1404 	}
1405 
1406 	return newsk;
1407 
1408 out_overflow:
1409 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1410 out_nonewsk:
1411 	dst_release(dst);
1412 out:
1413 	tcp_listendrop(sk);
1414 	return NULL;
1415 }
1416 
1417 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1418 							   u32));
1419 /* The socket must have it's spinlock held when we get
1420  * here, unless it is a TCP_LISTEN socket.
1421  *
1422  * We have a potential double-lock case here, so even when
1423  * doing backlog processing we use the BH locking scheme.
1424  * This is because we cannot sleep with the original spinlock
1425  * held.
1426  */
1427 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1428 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1429 {
1430 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1431 	struct sk_buff *opt_skb = NULL;
1432 	enum skb_drop_reason reason;
1433 	struct tcp_sock *tp;
1434 
1435 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1436 	   goes to IPv4 receive handler and backlogged.
1437 	   From backlog it always goes here. Kerboom...
1438 	   Fortunately, tcp_rcv_established and rcv_established
1439 	   handle them correctly, but it is not case with
1440 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1441 	 */
1442 
1443 	if (skb->protocol == htons(ETH_P_IP))
1444 		return tcp_v4_do_rcv(sk, skb);
1445 
1446 	/*
1447 	 *	socket locking is here for SMP purposes as backlog rcv
1448 	 *	is currently called with bh processing disabled.
1449 	 */
1450 
1451 	/* Do Stevens' IPV6_PKTOPTIONS.
1452 
1453 	   Yes, guys, it is the only place in our code, where we
1454 	   may make it not affecting IPv4.
1455 	   The rest of code is protocol independent,
1456 	   and I do not like idea to uglify IPv4.
1457 
1458 	   Actually, all the idea behind IPV6_PKTOPTIONS
1459 	   looks not very well thought. For now we latch
1460 	   options, received in the last packet, enqueued
1461 	   by tcp. Feel free to propose better solution.
1462 					       --ANK (980728)
1463 	 */
1464 	if (np->rxopt.all)
1465 		opt_skb = skb_clone_and_charge_r(skb, sk);
1466 
1467 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1468 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1469 		struct dst_entry *dst;
1470 
1471 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1472 						lockdep_sock_is_held(sk));
1473 
1474 		sock_rps_save_rxhash(sk, skb);
1475 		sk_mark_napi_id(sk, skb);
1476 		if (dst) {
1477 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1478 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1479 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1480 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1481 				dst_release(dst);
1482 			}
1483 		}
1484 
1485 		tcp_rcv_established(sk, skb);
1486 		if (opt_skb)
1487 			goto ipv6_pktoptions;
1488 		return 0;
1489 	}
1490 
1491 	if (tcp_checksum_complete(skb))
1492 		goto csum_err;
1493 
1494 	if (sk->sk_state == TCP_LISTEN) {
1495 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1496 
1497 		if (!nsk)
1498 			goto discard;
1499 
1500 		if (nsk != sk) {
1501 			if (tcp_child_process(sk, nsk, skb))
1502 				goto reset;
1503 			if (opt_skb)
1504 				__kfree_skb(opt_skb);
1505 			return 0;
1506 		}
1507 	} else
1508 		sock_rps_save_rxhash(sk, skb);
1509 
1510 	if (tcp_rcv_state_process(sk, skb))
1511 		goto reset;
1512 	if (opt_skb)
1513 		goto ipv6_pktoptions;
1514 	return 0;
1515 
1516 reset:
1517 	tcp_v6_send_reset(sk, skb);
1518 discard:
1519 	if (opt_skb)
1520 		__kfree_skb(opt_skb);
1521 	kfree_skb_reason(skb, reason);
1522 	return 0;
1523 csum_err:
1524 	reason = SKB_DROP_REASON_TCP_CSUM;
1525 	trace_tcp_bad_csum(skb);
1526 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1527 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1528 	goto discard;
1529 
1530 
1531 ipv6_pktoptions:
1532 	/* Do you ask, what is it?
1533 
1534 	   1. skb was enqueued by tcp.
1535 	   2. skb is added to tail of read queue, rather than out of order.
1536 	   3. socket is not in passive state.
1537 	   4. Finally, it really contains options, which user wants to receive.
1538 	 */
1539 	tp = tcp_sk(sk);
1540 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1541 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1542 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1543 			np->mcast_oif = tcp_v6_iif(opt_skb);
1544 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1545 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1546 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1547 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1548 		if (np->repflow)
1549 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1550 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1551 			tcp_v6_restore_cb(opt_skb);
1552 			opt_skb = xchg(&np->pktoptions, opt_skb);
1553 		} else {
1554 			__kfree_skb(opt_skb);
1555 			opt_skb = xchg(&np->pktoptions, NULL);
1556 		}
1557 	}
1558 
1559 	consume_skb(opt_skb);
1560 	return 0;
1561 }
1562 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1563 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1564 			   const struct tcphdr *th)
1565 {
1566 	/* This is tricky: we move IP6CB at its correct location into
1567 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1568 	 * _decode_session6() uses IP6CB().
1569 	 * barrier() makes sure compiler won't play aliasing games.
1570 	 */
1571 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1572 		sizeof(struct inet6_skb_parm));
1573 	barrier();
1574 
1575 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1576 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1577 				    skb->len - th->doff*4);
1578 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1579 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1580 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1581 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1582 	TCP_SKB_CB(skb)->sacked = 0;
1583 	TCP_SKB_CB(skb)->has_rxtstamp =
1584 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1585 }
1586 
tcp_v6_rcv(struct sk_buff * skb)1587 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1588 {
1589 	enum skb_drop_reason drop_reason;
1590 	int sdif = inet6_sdif(skb);
1591 	int dif = inet6_iif(skb);
1592 	const struct tcphdr *th;
1593 	const struct ipv6hdr *hdr;
1594 	bool refcounted;
1595 	struct sock *sk;
1596 	int ret;
1597 	struct net *net = dev_net(skb->dev);
1598 
1599 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1600 	if (skb->pkt_type != PACKET_HOST)
1601 		goto discard_it;
1602 
1603 	/*
1604 	 *	Count it even if it's bad.
1605 	 */
1606 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1607 
1608 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1609 		goto discard_it;
1610 
1611 	th = (const struct tcphdr *)skb->data;
1612 
1613 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1614 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1615 		goto bad_packet;
1616 	}
1617 	if (!pskb_may_pull(skb, th->doff*4))
1618 		goto discard_it;
1619 
1620 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1621 		goto csum_error;
1622 
1623 	th = (const struct tcphdr *)skb->data;
1624 	hdr = ipv6_hdr(skb);
1625 
1626 lookup:
1627 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1628 				th->source, th->dest, inet6_iif(skb), sdif,
1629 				&refcounted);
1630 	if (!sk)
1631 		goto no_tcp_socket;
1632 
1633 process:
1634 	if (sk->sk_state == TCP_TIME_WAIT)
1635 		goto do_time_wait;
1636 
1637 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1638 		struct request_sock *req = inet_reqsk(sk);
1639 		bool req_stolen = false;
1640 		struct sock *nsk;
1641 
1642 		sk = req->rsk_listener;
1643 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1644 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1645 		else
1646 			drop_reason = tcp_inbound_md5_hash(sk, skb,
1647 							   &hdr->saddr, &hdr->daddr,
1648 							   AF_INET6, dif, sdif);
1649 		if (drop_reason) {
1650 			sk_drops_add(sk, skb);
1651 			reqsk_put(req);
1652 			goto discard_it;
1653 		}
1654 		if (tcp_checksum_complete(skb)) {
1655 			reqsk_put(req);
1656 			goto csum_error;
1657 		}
1658 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1659 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1660 			if (!nsk) {
1661 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1662 				goto lookup;
1663 			}
1664 			sk = nsk;
1665 			/* reuseport_migrate_sock() has already held one sk_refcnt
1666 			 * before returning.
1667 			 */
1668 		} else {
1669 			sock_hold(sk);
1670 		}
1671 		refcounted = true;
1672 		nsk = NULL;
1673 		if (!tcp_filter(sk, skb)) {
1674 			th = (const struct tcphdr *)skb->data;
1675 			hdr = ipv6_hdr(skb);
1676 			tcp_v6_fill_cb(skb, hdr, th);
1677 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1678 		} else {
1679 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1680 		}
1681 		if (!nsk) {
1682 			reqsk_put(req);
1683 			if (req_stolen) {
1684 				/* Another cpu got exclusive access to req
1685 				 * and created a full blown socket.
1686 				 * Try to feed this packet to this socket
1687 				 * instead of discarding it.
1688 				 */
1689 				tcp_v6_restore_cb(skb);
1690 				sock_put(sk);
1691 				goto lookup;
1692 			}
1693 			goto discard_and_relse;
1694 		}
1695 		nf_reset_ct(skb);
1696 		if (nsk == sk) {
1697 			reqsk_put(req);
1698 			tcp_v6_restore_cb(skb);
1699 		} else if (tcp_child_process(sk, nsk, skb)) {
1700 			tcp_v6_send_reset(nsk, skb);
1701 			goto discard_and_relse;
1702 		} else {
1703 			sock_put(sk);
1704 			return 0;
1705 		}
1706 	}
1707 
1708 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1709 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1710 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1711 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1712 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1713 			goto discard_and_relse;
1714 		}
1715 	}
1716 
1717 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1718 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1719 		goto discard_and_relse;
1720 	}
1721 
1722 	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1723 					   AF_INET6, dif, sdif);
1724 	if (drop_reason)
1725 		goto discard_and_relse;
1726 
1727 	nf_reset_ct(skb);
1728 
1729 	if (tcp_filter(sk, skb)) {
1730 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1731 		goto discard_and_relse;
1732 	}
1733 	th = (const struct tcphdr *)skb->data;
1734 	hdr = ipv6_hdr(skb);
1735 	tcp_v6_fill_cb(skb, hdr, th);
1736 
1737 	skb->dev = NULL;
1738 
1739 	if (sk->sk_state == TCP_LISTEN) {
1740 		ret = tcp_v6_do_rcv(sk, skb);
1741 		goto put_and_return;
1742 	}
1743 
1744 	sk_incoming_cpu_update(sk);
1745 
1746 	bh_lock_sock_nested(sk);
1747 	tcp_segs_in(tcp_sk(sk), skb);
1748 	ret = 0;
1749 	if (!sock_owned_by_user(sk)) {
1750 		ret = tcp_v6_do_rcv(sk, skb);
1751 	} else {
1752 		if (tcp_add_backlog(sk, skb, &drop_reason))
1753 			goto discard_and_relse;
1754 	}
1755 	bh_unlock_sock(sk);
1756 put_and_return:
1757 	if (refcounted)
1758 		sock_put(sk);
1759 	return ret ? -1 : 0;
1760 
1761 no_tcp_socket:
1762 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1763 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1764 		goto discard_it;
1765 
1766 	tcp_v6_fill_cb(skb, hdr, th);
1767 
1768 	if (tcp_checksum_complete(skb)) {
1769 csum_error:
1770 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1771 		trace_tcp_bad_csum(skb);
1772 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1773 bad_packet:
1774 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1775 	} else {
1776 		tcp_v6_send_reset(NULL, skb);
1777 	}
1778 
1779 discard_it:
1780 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1781 	kfree_skb_reason(skb, drop_reason);
1782 	return 0;
1783 
1784 discard_and_relse:
1785 	sk_drops_add(sk, skb);
1786 	if (refcounted)
1787 		sock_put(sk);
1788 	goto discard_it;
1789 
1790 do_time_wait:
1791 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1792 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1793 		inet_twsk_put(inet_twsk(sk));
1794 		goto discard_it;
1795 	}
1796 
1797 	tcp_v6_fill_cb(skb, hdr, th);
1798 
1799 	if (tcp_checksum_complete(skb)) {
1800 		inet_twsk_put(inet_twsk(sk));
1801 		goto csum_error;
1802 	}
1803 
1804 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1805 	case TCP_TW_SYN:
1806 	{
1807 		struct sock *sk2;
1808 
1809 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1810 					    skb, __tcp_hdrlen(th),
1811 					    &ipv6_hdr(skb)->saddr, th->source,
1812 					    &ipv6_hdr(skb)->daddr,
1813 					    ntohs(th->dest),
1814 					    tcp_v6_iif_l3_slave(skb),
1815 					    sdif);
1816 		if (sk2) {
1817 			struct inet_timewait_sock *tw = inet_twsk(sk);
1818 			inet_twsk_deschedule_put(tw);
1819 			sk = sk2;
1820 			tcp_v6_restore_cb(skb);
1821 			refcounted = false;
1822 			goto process;
1823 		}
1824 	}
1825 		/* to ACK */
1826 		fallthrough;
1827 	case TCP_TW_ACK:
1828 		tcp_v6_timewait_ack(sk, skb);
1829 		break;
1830 	case TCP_TW_RST:
1831 		tcp_v6_send_reset(sk, skb);
1832 		inet_twsk_deschedule_put(inet_twsk(sk));
1833 		goto discard_it;
1834 	case TCP_TW_SUCCESS:
1835 		;
1836 	}
1837 	goto discard_it;
1838 }
1839 
tcp_v6_early_demux(struct sk_buff * skb)1840 void tcp_v6_early_demux(struct sk_buff *skb)
1841 {
1842 	struct net *net = dev_net(skb->dev);
1843 	const struct ipv6hdr *hdr;
1844 	const struct tcphdr *th;
1845 	struct sock *sk;
1846 
1847 	if (skb->pkt_type != PACKET_HOST)
1848 		return;
1849 
1850 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1851 		return;
1852 
1853 	hdr = ipv6_hdr(skb);
1854 	th = tcp_hdr(skb);
1855 
1856 	if (th->doff < sizeof(struct tcphdr) / 4)
1857 		return;
1858 
1859 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1860 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1861 					&hdr->saddr, th->source,
1862 					&hdr->daddr, ntohs(th->dest),
1863 					inet6_iif(skb), inet6_sdif(skb));
1864 	if (sk) {
1865 		skb->sk = sk;
1866 		skb->destructor = sock_edemux;
1867 		if (sk_fullsock(sk)) {
1868 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1869 
1870 			if (dst)
1871 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1872 			if (dst &&
1873 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1874 				skb_dst_set_noref(skb, dst);
1875 		}
1876 	}
1877 }
1878 
1879 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1880 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1881 	.twsk_unique	= tcp_twsk_unique,
1882 	.twsk_destructor = tcp_twsk_destructor,
1883 };
1884 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1885 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1886 {
1887 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1888 }
1889 
1890 const struct inet_connection_sock_af_ops ipv6_specific = {
1891 	.queue_xmit	   = inet6_csk_xmit,
1892 	.send_check	   = tcp_v6_send_check,
1893 	.rebuild_header	   = inet6_sk_rebuild_header,
1894 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1895 	.conn_request	   = tcp_v6_conn_request,
1896 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1897 	.net_header_len	   = sizeof(struct ipv6hdr),
1898 	.net_frag_header_len = sizeof(struct frag_hdr),
1899 	.setsockopt	   = ipv6_setsockopt,
1900 	.getsockopt	   = ipv6_getsockopt,
1901 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1902 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1903 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1904 };
1905 
1906 #ifdef CONFIG_TCP_MD5SIG
1907 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1908 	.md5_lookup	=	tcp_v6_md5_lookup,
1909 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1910 	.md5_parse	=	tcp_v6_parse_md5_keys,
1911 };
1912 #endif
1913 
1914 /*
1915  *	TCP over IPv4 via INET6 API
1916  */
1917 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1918 	.queue_xmit	   = ip_queue_xmit,
1919 	.send_check	   = tcp_v4_send_check,
1920 	.rebuild_header	   = inet_sk_rebuild_header,
1921 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1922 	.conn_request	   = tcp_v6_conn_request,
1923 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1924 	.net_header_len	   = sizeof(struct iphdr),
1925 	.setsockopt	   = ipv6_setsockopt,
1926 	.getsockopt	   = ipv6_getsockopt,
1927 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1928 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1929 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1930 };
1931 
1932 #ifdef CONFIG_TCP_MD5SIG
1933 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1934 	.md5_lookup	=	tcp_v4_md5_lookup,
1935 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1936 	.md5_parse	=	tcp_v6_parse_md5_keys,
1937 };
1938 #endif
1939 
1940 /* NOTE: A lot of things set to zero explicitly by call to
1941  *       sk_alloc() so need not be done here.
1942  */
tcp_v6_init_sock(struct sock * sk)1943 static int tcp_v6_init_sock(struct sock *sk)
1944 {
1945 	struct inet_connection_sock *icsk = inet_csk(sk);
1946 
1947 	tcp_init_sock(sk);
1948 
1949 	icsk->icsk_af_ops = &ipv6_specific;
1950 
1951 #ifdef CONFIG_TCP_MD5SIG
1952 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1953 #endif
1954 
1955 	return 0;
1956 }
1957 
1958 #ifdef CONFIG_PROC_FS
1959 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1960 static void get_openreq6(struct seq_file *seq,
1961 			 const struct request_sock *req, int i)
1962 {
1963 	long ttd = req->rsk_timer.expires - jiffies;
1964 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1965 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1966 
1967 	if (ttd < 0)
1968 		ttd = 0;
1969 
1970 	seq_printf(seq,
1971 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1972 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1973 		   i,
1974 		   src->s6_addr32[0], src->s6_addr32[1],
1975 		   src->s6_addr32[2], src->s6_addr32[3],
1976 		   inet_rsk(req)->ir_num,
1977 		   dest->s6_addr32[0], dest->s6_addr32[1],
1978 		   dest->s6_addr32[2], dest->s6_addr32[3],
1979 		   ntohs(inet_rsk(req)->ir_rmt_port),
1980 		   TCP_SYN_RECV,
1981 		   0, 0, /* could print option size, but that is af dependent. */
1982 		   1,   /* timers active (only the expire timer) */
1983 		   jiffies_to_clock_t(ttd),
1984 		   req->num_timeout,
1985 		   from_kuid_munged(seq_user_ns(seq),
1986 				    sock_i_uid(req->rsk_listener)),
1987 		   0,  /* non standard timer */
1988 		   0, /* open_requests have no inode */
1989 		   0, req);
1990 }
1991 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1992 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1993 {
1994 	const struct in6_addr *dest, *src;
1995 	__u16 destp, srcp;
1996 	int timer_active;
1997 	unsigned long timer_expires;
1998 	const struct inet_sock *inet = inet_sk(sp);
1999 	const struct tcp_sock *tp = tcp_sk(sp);
2000 	const struct inet_connection_sock *icsk = inet_csk(sp);
2001 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2002 	int rx_queue;
2003 	int state;
2004 
2005 	dest  = &sp->sk_v6_daddr;
2006 	src   = &sp->sk_v6_rcv_saddr;
2007 	destp = ntohs(inet->inet_dport);
2008 	srcp  = ntohs(inet->inet_sport);
2009 
2010 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2011 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2012 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2013 		timer_active	= 1;
2014 		timer_expires	= icsk->icsk_timeout;
2015 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2016 		timer_active	= 4;
2017 		timer_expires	= icsk->icsk_timeout;
2018 	} else if (timer_pending(&sp->sk_timer)) {
2019 		timer_active	= 2;
2020 		timer_expires	= sp->sk_timer.expires;
2021 	} else {
2022 		timer_active	= 0;
2023 		timer_expires = jiffies;
2024 	}
2025 
2026 	state = inet_sk_state_load(sp);
2027 	if (state == TCP_LISTEN)
2028 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2029 	else
2030 		/* Because we don't lock the socket,
2031 		 * we might find a transient negative value.
2032 		 */
2033 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2034 				      READ_ONCE(tp->copied_seq), 0);
2035 
2036 	seq_printf(seq,
2037 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2038 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2039 		   i,
2040 		   src->s6_addr32[0], src->s6_addr32[1],
2041 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2042 		   dest->s6_addr32[0], dest->s6_addr32[1],
2043 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2044 		   state,
2045 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2046 		   rx_queue,
2047 		   timer_active,
2048 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2049 		   icsk->icsk_retransmits,
2050 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2051 		   icsk->icsk_probes_out,
2052 		   sock_i_ino(sp),
2053 		   refcount_read(&sp->sk_refcnt), sp,
2054 		   jiffies_to_clock_t(icsk->icsk_rto),
2055 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2056 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2057 		   tcp_snd_cwnd(tp),
2058 		   state == TCP_LISTEN ?
2059 			fastopenq->max_qlen :
2060 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2061 		   );
2062 }
2063 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2064 static void get_timewait6_sock(struct seq_file *seq,
2065 			       struct inet_timewait_sock *tw, int i)
2066 {
2067 	long delta = tw->tw_timer.expires - jiffies;
2068 	const struct in6_addr *dest, *src;
2069 	__u16 destp, srcp;
2070 
2071 	dest = &tw->tw_v6_daddr;
2072 	src  = &tw->tw_v6_rcv_saddr;
2073 	destp = ntohs(tw->tw_dport);
2074 	srcp  = ntohs(tw->tw_sport);
2075 
2076 	seq_printf(seq,
2077 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2078 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2079 		   i,
2080 		   src->s6_addr32[0], src->s6_addr32[1],
2081 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2082 		   dest->s6_addr32[0], dest->s6_addr32[1],
2083 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2084 		   tw->tw_substate, 0, 0,
2085 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2086 		   refcount_read(&tw->tw_refcnt), tw);
2087 }
2088 
tcp6_seq_show(struct seq_file * seq,void * v)2089 static int tcp6_seq_show(struct seq_file *seq, void *v)
2090 {
2091 	struct tcp_iter_state *st;
2092 	struct sock *sk = v;
2093 
2094 	if (v == SEQ_START_TOKEN) {
2095 		seq_puts(seq,
2096 			 "  sl  "
2097 			 "local_address                         "
2098 			 "remote_address                        "
2099 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2100 			 "   uid  timeout inode\n");
2101 		goto out;
2102 	}
2103 	st = seq->private;
2104 
2105 	if (sk->sk_state == TCP_TIME_WAIT)
2106 		get_timewait6_sock(seq, v, st->num);
2107 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2108 		get_openreq6(seq, v, st->num);
2109 	else
2110 		get_tcp6_sock(seq, v, st->num);
2111 out:
2112 	return 0;
2113 }
2114 
2115 static const struct seq_operations tcp6_seq_ops = {
2116 	.show		= tcp6_seq_show,
2117 	.start		= tcp_seq_start,
2118 	.next		= tcp_seq_next,
2119 	.stop		= tcp_seq_stop,
2120 };
2121 
2122 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2123 	.family		= AF_INET6,
2124 };
2125 
tcp6_proc_init(struct net * net)2126 int __net_init tcp6_proc_init(struct net *net)
2127 {
2128 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2129 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2130 		return -ENOMEM;
2131 	return 0;
2132 }
2133 
tcp6_proc_exit(struct net * net)2134 void tcp6_proc_exit(struct net *net)
2135 {
2136 	remove_proc_entry("tcp6", net->proc_net);
2137 }
2138 #endif
2139 
2140 struct proto tcpv6_prot = {
2141 	.name			= "TCPv6",
2142 	.owner			= THIS_MODULE,
2143 	.close			= tcp_close,
2144 	.pre_connect		= tcp_v6_pre_connect,
2145 	.connect		= tcp_v6_connect,
2146 	.disconnect		= tcp_disconnect,
2147 	.accept			= inet_csk_accept,
2148 	.ioctl			= tcp_ioctl,
2149 	.init			= tcp_v6_init_sock,
2150 	.destroy		= tcp_v4_destroy_sock,
2151 	.shutdown		= tcp_shutdown,
2152 	.setsockopt		= tcp_setsockopt,
2153 	.getsockopt		= tcp_getsockopt,
2154 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2155 	.keepalive		= tcp_set_keepalive,
2156 	.recvmsg		= tcp_recvmsg,
2157 	.sendmsg		= tcp_sendmsg,
2158 	.splice_eof		= tcp_splice_eof,
2159 	.backlog_rcv		= tcp_v6_do_rcv,
2160 	.release_cb		= tcp_release_cb,
2161 	.hash			= inet6_hash,
2162 	.unhash			= inet_unhash,
2163 	.get_port		= inet_csk_get_port,
2164 	.put_port		= inet_put_port,
2165 #ifdef CONFIG_BPF_SYSCALL
2166 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2167 #endif
2168 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2169 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2170 	.stream_memory_free	= tcp_stream_memory_free,
2171 	.sockets_allocated	= &tcp_sockets_allocated,
2172 
2173 	.memory_allocated	= &tcp_memory_allocated,
2174 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2175 
2176 	.memory_pressure	= &tcp_memory_pressure,
2177 	.orphan_count		= &tcp_orphan_count,
2178 	.sysctl_mem		= sysctl_tcp_mem,
2179 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2180 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2181 	.max_header		= MAX_TCP_HEADER,
2182 	.obj_size		= sizeof(struct tcp6_sock),
2183 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2184 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2185 	.twsk_prot		= &tcp6_timewait_sock_ops,
2186 	.rsk_prot		= &tcp6_request_sock_ops,
2187 	.h.hashinfo		= NULL,
2188 	.no_autobind		= true,
2189 	.diag_destroy		= tcp_abort,
2190 };
2191 EXPORT_SYMBOL_GPL(tcpv6_prot);
2192 
2193 static const struct inet6_protocol tcpv6_protocol = {
2194 	.handler	=	tcp_v6_rcv,
2195 	.err_handler	=	tcp_v6_err,
2196 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2197 };
2198 
2199 static struct inet_protosw tcpv6_protosw = {
2200 	.type		=	SOCK_STREAM,
2201 	.protocol	=	IPPROTO_TCP,
2202 	.prot		=	&tcpv6_prot,
2203 	.ops		=	&inet6_stream_ops,
2204 	.flags		=	INET_PROTOSW_PERMANENT |
2205 				INET_PROTOSW_ICSK,
2206 };
2207 
tcpv6_net_init(struct net * net)2208 static int __net_init tcpv6_net_init(struct net *net)
2209 {
2210 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2211 				    SOCK_RAW, IPPROTO_TCP, net);
2212 }
2213 
tcpv6_net_exit(struct net * net)2214 static void __net_exit tcpv6_net_exit(struct net *net)
2215 {
2216 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2217 }
2218 
2219 static struct pernet_operations tcpv6_net_ops = {
2220 	.init	    = tcpv6_net_init,
2221 	.exit	    = tcpv6_net_exit,
2222 };
2223 
tcpv6_init(void)2224 int __init tcpv6_init(void)
2225 {
2226 	int ret;
2227 
2228 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2229 	if (ret)
2230 		goto out;
2231 
2232 	/* register inet6 protocol */
2233 	ret = inet6_register_protosw(&tcpv6_protosw);
2234 	if (ret)
2235 		goto out_tcpv6_protocol;
2236 
2237 	ret = register_pernet_subsys(&tcpv6_net_ops);
2238 	if (ret)
2239 		goto out_tcpv6_protosw;
2240 
2241 	ret = mptcpv6_init();
2242 	if (ret)
2243 		goto out_tcpv6_pernet_subsys;
2244 
2245 out:
2246 	return ret;
2247 
2248 out_tcpv6_pernet_subsys:
2249 	unregister_pernet_subsys(&tcpv6_net_ops);
2250 out_tcpv6_protosw:
2251 	inet6_unregister_protosw(&tcpv6_protosw);
2252 out_tcpv6_protocol:
2253 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2254 	goto out;
2255 }
2256 
tcpv6_exit(void)2257 void tcpv6_exit(void)
2258 {
2259 	unregister_pernet_subsys(&tcpv6_net_ops);
2260 	inet6_unregister_protosw(&tcpv6_protosw);
2261 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2262 }
2263