• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		rcu_assign_pointer(sk->sk_rx_dst, dst);
111 		sk->sk_rx_dst_ifindex = skb->skb_iif;
112 		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_connection_sock *icsk = inet_csk(sk);
150 	struct in6_addr *saddr = NULL, *final_p, final;
151 	struct inet_timewait_death_row *tcp_death_row;
152 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 	struct inet_sock *inet = inet_sk(sk);
154 	struct tcp_sock *tp = tcp_sk(sk);
155 	struct net *net = sock_net(sk);
156 	struct ipv6_txoptions *opt;
157 	struct dst_entry *dst;
158 	struct flowi6 fl6;
159 	int addr_type;
160 	int err;
161 
162 	if (addr_len < SIN6_LEN_RFC2133)
163 		return -EINVAL;
164 
165 	if (usin->sin6_family != AF_INET6)
166 		return -EAFNOSUPPORT;
167 
168 	memset(&fl6, 0, sizeof(fl6));
169 
170 	if (np->sndflow) {
171 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172 		IP6_ECN_flow_init(fl6.flowlabel);
173 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174 			struct ip6_flowlabel *flowlabel;
175 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176 			if (IS_ERR(flowlabel))
177 				return -EINVAL;
178 			fl6_sock_release(flowlabel);
179 		}
180 	}
181 
182 	/*
183 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
184 	 */
185 
186 	if (ipv6_addr_any(&usin->sin6_addr)) {
187 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189 					       &usin->sin6_addr);
190 		else
191 			usin->sin6_addr = in6addr_loopback;
192 	}
193 
194 	addr_type = ipv6_addr_type(&usin->sin6_addr);
195 
196 	if (addr_type & IPV6_ADDR_MULTICAST)
197 		return -ENETUNREACH;
198 
199 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
200 		if (addr_len >= sizeof(struct sockaddr_in6) &&
201 		    usin->sin6_scope_id) {
202 			/* If interface is set while binding, indices
203 			 * must coincide.
204 			 */
205 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206 				return -EINVAL;
207 
208 			sk->sk_bound_dev_if = usin->sin6_scope_id;
209 		}
210 
211 		/* Connect to link-local address requires an interface */
212 		if (!sk->sk_bound_dev_if)
213 			return -EINVAL;
214 	}
215 
216 	if (tp->rx_opt.ts_recent_stamp &&
217 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218 		tp->rx_opt.ts_recent = 0;
219 		tp->rx_opt.ts_recent_stamp = 0;
220 		WRITE_ONCE(tp->write_seq, 0);
221 	}
222 
223 	sk->sk_v6_daddr = usin->sin6_addr;
224 	np->flow_label = fl6.flowlabel;
225 
226 	/*
227 	 *	TCP over IPv4
228 	 */
229 
230 	if (addr_type & IPV6_ADDR_MAPPED) {
231 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
232 		struct sockaddr_in sin;
233 
234 		if (ipv6_only_sock(sk))
235 			return -ENETUNREACH;
236 
237 		sin.sin_family = AF_INET;
238 		sin.sin_port = usin->sin6_port;
239 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240 
241 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243 		if (sk_is_mptcp(sk))
244 			mptcpv6_handle_mapped(sk, true);
245 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249 
250 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251 
252 		if (err) {
253 			icsk->icsk_ext_hdr_len = exthdrlen;
254 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256 			if (sk_is_mptcp(sk))
257 				mptcpv6_handle_mapped(sk, false);
258 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260 			tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262 			goto failure;
263 		}
264 		np->saddr = sk->sk_v6_rcv_saddr;
265 
266 		return err;
267 	}
268 
269 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270 		saddr = &sk->sk_v6_rcv_saddr;
271 
272 	fl6.flowi6_proto = IPPROTO_TCP;
273 	fl6.daddr = sk->sk_v6_daddr;
274 	fl6.saddr = saddr ? *saddr : np->saddr;
275 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
276 	fl6.flowi6_oif = sk->sk_bound_dev_if;
277 	fl6.flowi6_mark = sk->sk_mark;
278 	fl6.fl6_dport = usin->sin6_port;
279 	fl6.fl6_sport = inet->inet_sport;
280 	fl6.flowi6_uid = sk->sk_uid;
281 
282 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
283 	final_p = fl6_update_dst(&fl6, opt, &final);
284 
285 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286 
287 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
288 	if (IS_ERR(dst)) {
289 		err = PTR_ERR(dst);
290 		goto failure;
291 	}
292 
293 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
294 
295 	if (!saddr) {
296 		saddr = &fl6.saddr;
297 
298 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
299 		if (err)
300 			goto failure;
301 	}
302 
303 	/* set the source address */
304 	np->saddr = *saddr;
305 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
306 
307 	sk->sk_gso_type = SKB_GSO_TCPV6;
308 	ip6_dst_store(sk, dst, NULL, NULL);
309 
310 	icsk->icsk_ext_hdr_len = 0;
311 	if (opt)
312 		icsk->icsk_ext_hdr_len = opt->opt_flen +
313 					 opt->opt_nflen;
314 
315 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
316 
317 	inet->inet_dport = usin->sin6_port;
318 
319 	tcp_set_state(sk, TCP_SYN_SENT);
320 	err = inet6_hash_connect(tcp_death_row, sk);
321 	if (err)
322 		goto late_failure;
323 
324 	sk_set_txhash(sk);
325 
326 	if (likely(!tp->repair)) {
327 		if (!tp->write_seq)
328 			WRITE_ONCE(tp->write_seq,
329 				   secure_tcpv6_seq(np->saddr.s6_addr32,
330 						    sk->sk_v6_daddr.s6_addr32,
331 						    inet->inet_sport,
332 						    inet->inet_dport));
333 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
334 						   sk->sk_v6_daddr.s6_addr32);
335 	}
336 
337 	if (tcp_fastopen_defer_connect(sk, &err))
338 		return err;
339 	if (err)
340 		goto late_failure;
341 
342 	err = tcp_connect(sk);
343 	if (err)
344 		goto late_failure;
345 
346 	return 0;
347 
348 late_failure:
349 	tcp_set_state(sk, TCP_CLOSE);
350 	inet_bhash2_reset_saddr(sk);
351 failure:
352 	inet->inet_dport = 0;
353 	sk->sk_route_caps = 0;
354 	return err;
355 }
356 
tcp_v6_mtu_reduced(struct sock * sk)357 static void tcp_v6_mtu_reduced(struct sock *sk)
358 {
359 	struct dst_entry *dst;
360 	u32 mtu;
361 
362 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
363 		return;
364 
365 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
366 
367 	/* Drop requests trying to increase our current mss.
368 	 * Check done in __ip6_rt_update_pmtu() is too late.
369 	 */
370 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
371 		return;
372 
373 	dst = inet6_csk_update_pmtu(sk, mtu);
374 	if (!dst)
375 		return;
376 
377 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
378 		tcp_sync_mss(sk, dst_mtu(dst));
379 		tcp_simple_retransmit(sk);
380 	}
381 }
382 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)383 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
384 		u8 type, u8 code, int offset, __be32 info)
385 {
386 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
387 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
388 	struct net *net = dev_net(skb->dev);
389 	struct request_sock *fastopen;
390 	struct ipv6_pinfo *np;
391 	struct tcp_sock *tp;
392 	__u32 seq, snd_una;
393 	struct sock *sk;
394 	bool fatal;
395 	int err;
396 
397 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
398 					&hdr->daddr, th->dest,
399 					&hdr->saddr, ntohs(th->source),
400 					skb->dev->ifindex, inet6_sdif(skb));
401 
402 	if (!sk) {
403 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
404 				  ICMP6_MIB_INERRORS);
405 		return -ENOENT;
406 	}
407 
408 	if (sk->sk_state == TCP_TIME_WAIT) {
409 		inet_twsk_put(inet_twsk(sk));
410 		return 0;
411 	}
412 	seq = ntohl(th->seq);
413 	fatal = icmpv6_err_convert(type, code, &err);
414 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
415 		tcp_req_err(sk, seq, fatal);
416 		return 0;
417 	}
418 
419 	bh_lock_sock(sk);
420 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
421 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
422 
423 	if (sk->sk_state == TCP_CLOSE)
424 		goto out;
425 
426 	if (static_branch_unlikely(&ip6_min_hopcount)) {
427 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
428 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
429 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
430 			goto out;
431 		}
432 	}
433 
434 	tp = tcp_sk(sk);
435 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
436 	fastopen = rcu_dereference(tp->fastopen_rsk);
437 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
438 	if (sk->sk_state != TCP_LISTEN &&
439 	    !between(seq, snd_una, tp->snd_nxt)) {
440 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441 		goto out;
442 	}
443 
444 	np = tcp_inet6_sk(sk);
445 
446 	if (type == NDISC_REDIRECT) {
447 		if (!sock_owned_by_user(sk)) {
448 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
449 
450 			if (dst)
451 				dst->ops->redirect(dst, sk, skb);
452 		}
453 		goto out;
454 	}
455 
456 	if (type == ICMPV6_PKT_TOOBIG) {
457 		u32 mtu = ntohl(info);
458 
459 		/* We are not interested in TCP_LISTEN and open_requests
460 		 * (SYN-ACKs send out by Linux are always <576bytes so
461 		 * they should go through unfragmented).
462 		 */
463 		if (sk->sk_state == TCP_LISTEN)
464 			goto out;
465 
466 		if (!ip6_sk_accept_pmtu(sk))
467 			goto out;
468 
469 		if (mtu < IPV6_MIN_MTU)
470 			goto out;
471 
472 		WRITE_ONCE(tp->mtu_info, mtu);
473 
474 		if (!sock_owned_by_user(sk))
475 			tcp_v6_mtu_reduced(sk);
476 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
477 					   &sk->sk_tsq_flags))
478 			sock_hold(sk);
479 		goto out;
480 	}
481 
482 
483 	/* Might be for an request_sock */
484 	switch (sk->sk_state) {
485 	case TCP_SYN_SENT:
486 	case TCP_SYN_RECV:
487 		/* Only in fast or simultaneous open. If a fast open socket is
488 		 * already accepted it is treated as a connected one below.
489 		 */
490 		if (fastopen && !fastopen->sk)
491 			break;
492 
493 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
494 
495 		if (!sock_owned_by_user(sk)) {
496 			sk->sk_err = err;
497 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
498 
499 			tcp_done(sk);
500 		} else
501 			sk->sk_err_soft = err;
502 		goto out;
503 	case TCP_LISTEN:
504 		break;
505 	default:
506 		/* check if this ICMP message allows revert of backoff.
507 		 * (see RFC 6069)
508 		 */
509 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
510 		    code == ICMPV6_NOROUTE)
511 			tcp_ld_RTO_revert(sk, seq);
512 	}
513 
514 	if (!sock_owned_by_user(sk) && np->recverr) {
515 		sk->sk_err = err;
516 		sk_error_report(sk);
517 	} else
518 		sk->sk_err_soft = err;
519 
520 out:
521 	bh_unlock_sock(sk);
522 	sock_put(sk);
523 	return 0;
524 }
525 
526 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)527 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
528 			      struct flowi *fl,
529 			      struct request_sock *req,
530 			      struct tcp_fastopen_cookie *foc,
531 			      enum tcp_synack_type synack_type,
532 			      struct sk_buff *syn_skb)
533 {
534 	struct inet_request_sock *ireq = inet_rsk(req);
535 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
536 	struct ipv6_txoptions *opt;
537 	struct flowi6 *fl6 = &fl->u.ip6;
538 	struct sk_buff *skb;
539 	int err = -ENOMEM;
540 	u8 tclass;
541 
542 	/* First, grab a route. */
543 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
544 					       IPPROTO_TCP)) == NULL)
545 		goto done;
546 
547 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548 
549 	if (skb) {
550 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
551 				    &ireq->ir_v6_rmt_addr);
552 
553 		fl6->daddr = ireq->ir_v6_rmt_addr;
554 		if (np->repflow && ireq->pktopts)
555 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
556 
557 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
558 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
559 				(np->tclass & INET_ECN_MASK) :
560 				np->tclass;
561 
562 		if (!INET_ECN_is_capable(tclass) &&
563 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
564 			tclass |= INET_ECN_ECT_0;
565 
566 		rcu_read_lock();
567 		opt = ireq->ipv6_opt;
568 		if (!opt)
569 			opt = rcu_dereference(np->opt);
570 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
571 			       opt, tclass, sk->sk_priority);
572 		rcu_read_unlock();
573 		err = net_xmit_eval(err);
574 	}
575 
576 done:
577 	return err;
578 }
579 
580 
tcp_v6_reqsk_destructor(struct request_sock * req)581 static void tcp_v6_reqsk_destructor(struct request_sock *req)
582 {
583 	kfree(inet_rsk(req)->ipv6_opt);
584 	consume_skb(inet_rsk(req)->pktopts);
585 }
586 
587 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
589 						   const struct in6_addr *addr,
590 						   int l3index)
591 {
592 	return tcp_md5_do_lookup(sk, l3index,
593 				 (union tcp_md5_addr *)addr, AF_INET6);
594 }
595 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
597 						const struct sock *addr_sk)
598 {
599 	int l3index;
600 
601 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
602 						 addr_sk->sk_bound_dev_if);
603 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
604 				    l3index);
605 }
606 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
608 				 sockptr_t optval, int optlen)
609 {
610 	struct tcp_md5sig cmd;
611 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
612 	int l3index = 0;
613 	u8 prefixlen;
614 	u8 flags;
615 
616 	if (optlen < sizeof(cmd))
617 		return -EINVAL;
618 
619 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
620 		return -EFAULT;
621 
622 	if (sin6->sin6_family != AF_INET6)
623 		return -EINVAL;
624 
625 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
626 
627 	if (optname == TCP_MD5SIG_EXT &&
628 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
629 		prefixlen = cmd.tcpm_prefixlen;
630 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
631 					prefixlen > 32))
632 			return -EINVAL;
633 	} else {
634 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
635 	}
636 
637 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
638 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
639 		struct net_device *dev;
640 
641 		rcu_read_lock();
642 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
643 		if (dev && netif_is_l3_master(dev))
644 			l3index = dev->ifindex;
645 		rcu_read_unlock();
646 
647 		/* ok to reference set/not set outside of rcu;
648 		 * right now device MUST be an L3 master
649 		 */
650 		if (!dev || !l3index)
651 			return -EINVAL;
652 	}
653 
654 	if (!cmd.tcpm_keylen) {
655 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657 					      AF_INET, prefixlen,
658 					      l3index, flags);
659 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
660 				      AF_INET6, prefixlen, l3index, flags);
661 	}
662 
663 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
664 		return -EINVAL;
665 
666 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
668 				      AF_INET, prefixlen, l3index, flags,
669 				      cmd.tcpm_key, cmd.tcpm_keylen,
670 				      GFP_KERNEL);
671 
672 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673 			      AF_INET6, prefixlen, l3index, flags,
674 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
675 }
676 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)677 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
678 				   const struct in6_addr *daddr,
679 				   const struct in6_addr *saddr,
680 				   const struct tcphdr *th, int nbytes)
681 {
682 	struct tcp6_pseudohdr *bp;
683 	struct scatterlist sg;
684 	struct tcphdr *_th;
685 
686 	bp = hp->scratch;
687 	/* 1. TCP pseudo-header (RFC2460) */
688 	bp->saddr = *saddr;
689 	bp->daddr = *daddr;
690 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
691 	bp->len = cpu_to_be32(nbytes);
692 
693 	_th = (struct tcphdr *)(bp + 1);
694 	memcpy(_th, th, sizeof(*th));
695 	_th->check = 0;
696 
697 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
698 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
699 				sizeof(*bp) + sizeof(*th));
700 	return crypto_ahash_update(hp->md5_req);
701 }
702 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)703 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
704 			       const struct in6_addr *daddr, struct in6_addr *saddr,
705 			       const struct tcphdr *th)
706 {
707 	struct tcp_md5sig_pool *hp;
708 	struct ahash_request *req;
709 
710 	hp = tcp_get_md5sig_pool();
711 	if (!hp)
712 		goto clear_hash_noput;
713 	req = hp->md5_req;
714 
715 	if (crypto_ahash_init(req))
716 		goto clear_hash;
717 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
718 		goto clear_hash;
719 	if (tcp_md5_hash_key(hp, key))
720 		goto clear_hash;
721 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
722 	if (crypto_ahash_final(req))
723 		goto clear_hash;
724 
725 	tcp_put_md5sig_pool();
726 	return 0;
727 
728 clear_hash:
729 	tcp_put_md5sig_pool();
730 clear_hash_noput:
731 	memset(md5_hash, 0, 16);
732 	return 1;
733 }
734 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)735 static int tcp_v6_md5_hash_skb(char *md5_hash,
736 			       const struct tcp_md5sig_key *key,
737 			       const struct sock *sk,
738 			       const struct sk_buff *skb)
739 {
740 	const struct in6_addr *saddr, *daddr;
741 	struct tcp_md5sig_pool *hp;
742 	struct ahash_request *req;
743 	const struct tcphdr *th = tcp_hdr(skb);
744 
745 	if (sk) { /* valid for establish/request sockets */
746 		saddr = &sk->sk_v6_rcv_saddr;
747 		daddr = &sk->sk_v6_daddr;
748 	} else {
749 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
750 		saddr = &ip6h->saddr;
751 		daddr = &ip6h->daddr;
752 	}
753 
754 	hp = tcp_get_md5sig_pool();
755 	if (!hp)
756 		goto clear_hash_noput;
757 	req = hp->md5_req;
758 
759 	if (crypto_ahash_init(req))
760 		goto clear_hash;
761 
762 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
763 		goto clear_hash;
764 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
765 		goto clear_hash;
766 	if (tcp_md5_hash_key(hp, key))
767 		goto clear_hash;
768 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
769 	if (crypto_ahash_final(req))
770 		goto clear_hash;
771 
772 	tcp_put_md5sig_pool();
773 	return 0;
774 
775 clear_hash:
776 	tcp_put_md5sig_pool();
777 clear_hash_noput:
778 	memset(md5_hash, 0, 16);
779 	return 1;
780 }
781 
782 #endif
783 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)784 static void tcp_v6_init_req(struct request_sock *req,
785 			    const struct sock *sk_listener,
786 			    struct sk_buff *skb)
787 {
788 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
789 	struct inet_request_sock *ireq = inet_rsk(req);
790 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
791 
792 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
793 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
794 
795 	/* So that link locals have meaning */
796 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
797 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
798 		ireq->ir_iif = tcp_v6_iif(skb);
799 
800 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
801 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
802 	     np->rxopt.bits.rxinfo ||
803 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
804 	     np->rxopt.bits.rxohlim || np->repflow)) {
805 		refcount_inc(&skb->users);
806 		ireq->pktopts = skb;
807 	}
808 }
809 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)810 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
811 					  struct sk_buff *skb,
812 					  struct flowi *fl,
813 					  struct request_sock *req)
814 {
815 	tcp_v6_init_req(req, sk, skb);
816 
817 	if (security_inet_conn_request(sk, skb, req))
818 		return NULL;
819 
820 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
821 }
822 
823 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
824 	.family		=	AF_INET6,
825 	.obj_size	=	sizeof(struct tcp6_request_sock),
826 	.rtx_syn_ack	=	tcp_rtx_synack,
827 	.send_ack	=	tcp_v6_reqsk_send_ack,
828 	.destructor	=	tcp_v6_reqsk_destructor,
829 	.send_reset	=	tcp_v6_send_reset,
830 	.syn_ack_timeout =	tcp_syn_ack_timeout,
831 };
832 
833 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
834 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
835 				sizeof(struct ipv6hdr),
836 #ifdef CONFIG_TCP_MD5SIG
837 	.req_md5_lookup	=	tcp_v6_md5_lookup,
838 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
839 #endif
840 #ifdef CONFIG_SYN_COOKIES
841 	.cookie_init_seq =	cookie_v6_init_sequence,
842 #endif
843 	.route_req	=	tcp_v6_route_req,
844 	.init_seq	=	tcp_v6_init_seq,
845 	.init_ts_off	=	tcp_v6_init_ts_off,
846 	.send_synack	=	tcp_v6_send_synack,
847 };
848 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)849 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
850 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
851 				 int oif, struct tcp_md5sig_key *key, int rst,
852 				 u8 tclass, __be32 label, u32 priority, u32 txhash)
853 {
854 	const struct tcphdr *th = tcp_hdr(skb);
855 	struct tcphdr *t1;
856 	struct sk_buff *buff;
857 	struct flowi6 fl6;
858 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
859 	struct sock *ctl_sk = net->ipv6.tcp_sk;
860 	unsigned int tot_len = sizeof(struct tcphdr);
861 	__be32 mrst = 0, *topt;
862 	struct dst_entry *dst;
863 	__u32 mark = 0;
864 
865 	if (tsecr)
866 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
867 #ifdef CONFIG_TCP_MD5SIG
868 	if (key)
869 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
870 #endif
871 
872 #ifdef CONFIG_MPTCP
873 	if (rst && !key) {
874 		mrst = mptcp_reset_option(skb);
875 
876 		if (mrst)
877 			tot_len += sizeof(__be32);
878 	}
879 #endif
880 
881 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
882 	if (!buff)
883 		return;
884 
885 	skb_reserve(buff, MAX_TCP_HEADER);
886 
887 	t1 = skb_push(buff, tot_len);
888 	skb_reset_transport_header(buff);
889 
890 	/* Swap the send and the receive. */
891 	memset(t1, 0, sizeof(*t1));
892 	t1->dest = th->source;
893 	t1->source = th->dest;
894 	t1->doff = tot_len / 4;
895 	t1->seq = htonl(seq);
896 	t1->ack_seq = htonl(ack);
897 	t1->ack = !rst || !th->ack;
898 	t1->rst = rst;
899 	t1->window = htons(win);
900 
901 	topt = (__be32 *)(t1 + 1);
902 
903 	if (tsecr) {
904 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
905 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
906 		*topt++ = htonl(tsval);
907 		*topt++ = htonl(tsecr);
908 	}
909 
910 	if (mrst)
911 		*topt++ = mrst;
912 
913 #ifdef CONFIG_TCP_MD5SIG
914 	if (key) {
915 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
917 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
918 				    &ipv6_hdr(skb)->saddr,
919 				    &ipv6_hdr(skb)->daddr, t1);
920 	}
921 #endif
922 
923 	memset(&fl6, 0, sizeof(fl6));
924 	fl6.daddr = ipv6_hdr(skb)->saddr;
925 	fl6.saddr = ipv6_hdr(skb)->daddr;
926 	fl6.flowlabel = label;
927 
928 	buff->ip_summed = CHECKSUM_PARTIAL;
929 
930 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931 
932 	fl6.flowi6_proto = IPPROTO_TCP;
933 	if (rt6_need_strict(&fl6.daddr) && !oif)
934 		fl6.flowi6_oif = tcp_v6_iif(skb);
935 	else {
936 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937 			oif = skb->skb_iif;
938 
939 		fl6.flowi6_oif = oif;
940 	}
941 
942 	if (sk) {
943 		if (sk->sk_state == TCP_TIME_WAIT)
944 			mark = inet_twsk(sk)->tw_mark;
945 		else
946 			mark = READ_ONCE(sk->sk_mark);
947 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
948 	}
949 	if (txhash) {
950 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
951 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
952 	}
953 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954 	fl6.fl6_dport = t1->dest;
955 	fl6.fl6_sport = t1->source;
956 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
958 
959 	/* Pass a socket to ip6_dst_lookup either it is for RST
960 	 * Underlying function will use this to retrieve the network
961 	 * namespace
962 	 */
963 	if (sk && sk->sk_state != TCP_TIME_WAIT)
964 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
965 	else
966 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
967 	if (!IS_ERR(dst)) {
968 		skb_dst_set(buff, dst);
969 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
970 			 tclass & ~INET_ECN_MASK, priority);
971 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
972 		if (rst)
973 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
974 		return;
975 	}
976 
977 	kfree_skb(buff);
978 }
979 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)980 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
981 {
982 	const struct tcphdr *th = tcp_hdr(skb);
983 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
984 	u32 seq = 0, ack_seq = 0;
985 	struct tcp_md5sig_key *key = NULL;
986 #ifdef CONFIG_TCP_MD5SIG
987 	const __u8 *hash_location = NULL;
988 	unsigned char newhash[16];
989 	int genhash;
990 	struct sock *sk1 = NULL;
991 #endif
992 	__be32 label = 0;
993 	u32 priority = 0;
994 	struct net *net;
995 	u32 txhash = 0;
996 	int oif = 0;
997 
998 	if (th->rst)
999 		return;
1000 
1001 	/* If sk not NULL, it means we did a successful lookup and incoming
1002 	 * route had to be correct. prequeue might have dropped our dst.
1003 	 */
1004 	if (!sk && !ipv6_unicast_destination(skb))
1005 		return;
1006 
1007 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1008 #ifdef CONFIG_TCP_MD5SIG
1009 	rcu_read_lock();
1010 	hash_location = tcp_parse_md5sig_option(th);
1011 	if (sk && sk_fullsock(sk)) {
1012 		int l3index;
1013 
1014 		/* sdif set, means packet ingressed via a device
1015 		 * in an L3 domain and inet_iif is set to it.
1016 		 */
1017 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1018 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1019 	} else if (hash_location) {
1020 		int dif = tcp_v6_iif_l3_slave(skb);
1021 		int sdif = tcp_v6_sdif(skb);
1022 		int l3index;
1023 
1024 		/*
1025 		 * active side is lost. Try to find listening socket through
1026 		 * source port, and then find md5 key through listening socket.
1027 		 * we are not loose security here:
1028 		 * Incoming packet is checked with md5 hash with finding key,
1029 		 * no RST generated if md5 hash doesn't match.
1030 		 */
1031 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1032 					    NULL, 0, &ipv6h->saddr, th->source,
1033 					    &ipv6h->daddr, ntohs(th->source),
1034 					    dif, sdif);
1035 		if (!sk1)
1036 			goto out;
1037 
1038 		/* sdif set, means packet ingressed via a device
1039 		 * in an L3 domain and dif is set to it.
1040 		 */
1041 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1042 
1043 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044 		if (!key)
1045 			goto out;
1046 
1047 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1048 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1049 			goto out;
1050 	}
1051 #endif
1052 
1053 	if (th->ack)
1054 		seq = ntohl(th->ack_seq);
1055 	else
1056 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1057 			  (th->doff << 2);
1058 
1059 	if (sk) {
1060 		oif = sk->sk_bound_dev_if;
1061 		if (sk_fullsock(sk)) {
1062 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1063 
1064 			trace_tcp_send_reset(sk, skb);
1065 			if (np->repflow)
1066 				label = ip6_flowlabel(ipv6h);
1067 			priority = sk->sk_priority;
1068 			txhash = sk->sk_txhash;
1069 		}
1070 		if (sk->sk_state == TCP_TIME_WAIT) {
1071 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1072 			priority = inet_twsk(sk)->tw_priority;
1073 			txhash = inet_twsk(sk)->tw_txhash;
1074 		}
1075 	} else {
1076 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1077 			label = ip6_flowlabel(ipv6h);
1078 	}
1079 
1080 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1081 			     ipv6_get_dsfield(ipv6h), label, priority, txhash);
1082 
1083 #ifdef CONFIG_TCP_MD5SIG
1084 out:
1085 	rcu_read_unlock();
1086 #endif
1087 }
1088 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1090 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1091 			    struct tcp_md5sig_key *key, u8 tclass,
1092 			    __be32 label, u32 priority, u32 txhash)
1093 {
1094 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1095 			     tclass, label, priority, txhash);
1096 }
1097 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100 	struct inet_timewait_sock *tw = inet_twsk(sk);
1101 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1102 
1103 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1104 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1105 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1106 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1107 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1108 			tw->tw_txhash);
1109 
1110 	inet_twsk_put(tw);
1111 }
1112 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114 				  struct request_sock *req)
1115 {
1116 	int l3index;
1117 
1118 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119 
1120 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122 	 */
1123 	/* RFC 7323 2.3
1124 	 * The window field (SEG.WND) of every outgoing segment, with the
1125 	 * exception of <SYN> segments, MUST be right-shifted by
1126 	 * Rcv.Wind.Shift bits:
1127 	 */
1128 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130 			tcp_rsk(req)->rcv_nxt,
1131 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1134 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135 			ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1136 			READ_ONCE(sk->sk_priority),
1137 			READ_ONCE(tcp_rsk(req)->txhash));
1138 }
1139 
1140 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1141 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1142 {
1143 #ifdef CONFIG_SYN_COOKIES
1144 	const struct tcphdr *th = tcp_hdr(skb);
1145 
1146 	if (!th->syn)
1147 		sk = cookie_v6_check(sk, skb);
1148 #endif
1149 	return sk;
1150 }
1151 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1152 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1153 			 struct tcphdr *th, u32 *cookie)
1154 {
1155 	u16 mss = 0;
1156 #ifdef CONFIG_SYN_COOKIES
1157 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1158 				    &tcp_request_sock_ipv6_ops, sk, th);
1159 	if (mss) {
1160 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1161 		tcp_synq_overflow(sk);
1162 	}
1163 #endif
1164 	return mss;
1165 }
1166 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1167 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1168 {
1169 	if (skb->protocol == htons(ETH_P_IP))
1170 		return tcp_v4_conn_request(sk, skb);
1171 
1172 	if (!ipv6_unicast_destination(skb))
1173 		goto drop;
1174 
1175 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1176 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1177 		return 0;
1178 	}
1179 
1180 	return tcp_conn_request(&tcp6_request_sock_ops,
1181 				&tcp_request_sock_ipv6_ops, sk, skb);
1182 
1183 drop:
1184 	tcp_listendrop(sk);
1185 	return 0; /* don't send reset */
1186 }
1187 
tcp_v6_restore_cb(struct sk_buff * skb)1188 static void tcp_v6_restore_cb(struct sk_buff *skb)
1189 {
1190 	/* We need to move header back to the beginning if xfrm6_policy_check()
1191 	 * and tcp_v6_fill_cb() are going to be called again.
1192 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1193 	 */
1194 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1195 		sizeof(struct inet6_skb_parm));
1196 }
1197 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1198 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1199 					 struct request_sock *req,
1200 					 struct dst_entry *dst,
1201 					 struct request_sock *req_unhash,
1202 					 bool *own_req)
1203 {
1204 	struct inet_request_sock *ireq;
1205 	struct ipv6_pinfo *newnp;
1206 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1207 	struct ipv6_txoptions *opt;
1208 	struct inet_sock *newinet;
1209 	bool found_dup_sk = false;
1210 	struct tcp_sock *newtp;
1211 	struct sock *newsk;
1212 #ifdef CONFIG_TCP_MD5SIG
1213 	struct tcp_md5sig_key *key;
1214 	int l3index;
1215 #endif
1216 	struct flowi6 fl6;
1217 
1218 	if (skb->protocol == htons(ETH_P_IP)) {
1219 		/*
1220 		 *	v6 mapped
1221 		 */
1222 
1223 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1224 					     req_unhash, own_req);
1225 
1226 		if (!newsk)
1227 			return NULL;
1228 
1229 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1230 
1231 		newnp = tcp_inet6_sk(newsk);
1232 		newtp = tcp_sk(newsk);
1233 
1234 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1235 
1236 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1237 
1238 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1239 		if (sk_is_mptcp(newsk))
1240 			mptcpv6_handle_mapped(newsk, true);
1241 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1242 #ifdef CONFIG_TCP_MD5SIG
1243 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1244 #endif
1245 
1246 		newnp->ipv6_mc_list = NULL;
1247 		newnp->ipv6_ac_list = NULL;
1248 		newnp->ipv6_fl_list = NULL;
1249 		newnp->pktoptions  = NULL;
1250 		newnp->opt	   = NULL;
1251 		newnp->mcast_oif   = inet_iif(skb);
1252 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1253 		newnp->rcv_flowinfo = 0;
1254 		if (np->repflow)
1255 			newnp->flow_label = 0;
1256 
1257 		/*
1258 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1259 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1260 		 * that function for the gory details. -acme
1261 		 */
1262 
1263 		/* It is tricky place. Until this moment IPv4 tcp
1264 		   worked with IPv6 icsk.icsk_af_ops.
1265 		   Sync it now.
1266 		 */
1267 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1268 
1269 		return newsk;
1270 	}
1271 
1272 	ireq = inet_rsk(req);
1273 
1274 	if (sk_acceptq_is_full(sk))
1275 		goto out_overflow;
1276 
1277 	if (!dst) {
1278 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1279 		if (!dst)
1280 			goto out;
1281 	}
1282 
1283 	newsk = tcp_create_openreq_child(sk, req, skb);
1284 	if (!newsk)
1285 		goto out_nonewsk;
1286 
1287 	/*
1288 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1289 	 * count here, tcp_create_openreq_child now does this for us, see the
1290 	 * comment in that function for the gory details. -acme
1291 	 */
1292 
1293 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1294 	ip6_dst_store(newsk, dst, NULL, NULL);
1295 	inet6_sk_rx_dst_set(newsk, skb);
1296 
1297 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1298 
1299 	newtp = tcp_sk(newsk);
1300 	newinet = inet_sk(newsk);
1301 	newnp = tcp_inet6_sk(newsk);
1302 
1303 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1304 
1305 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1306 	newnp->saddr = ireq->ir_v6_loc_addr;
1307 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1308 	newsk->sk_bound_dev_if = ireq->ir_iif;
1309 
1310 	/* Now IPv6 options...
1311 
1312 	   First: no IPv4 options.
1313 	 */
1314 	newinet->inet_opt = NULL;
1315 	newnp->ipv6_mc_list = NULL;
1316 	newnp->ipv6_ac_list = NULL;
1317 	newnp->ipv6_fl_list = NULL;
1318 
1319 	/* Clone RX bits */
1320 	newnp->rxopt.all = np->rxopt.all;
1321 
1322 	newnp->pktoptions = NULL;
1323 	newnp->opt	  = NULL;
1324 	newnp->mcast_oif  = tcp_v6_iif(skb);
1325 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1326 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1327 	if (np->repflow)
1328 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1329 
1330 	/* Set ToS of the new socket based upon the value of incoming SYN.
1331 	 * ECT bits are set later in tcp_init_transfer().
1332 	 */
1333 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1334 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1335 
1336 	/* Clone native IPv6 options from listening socket (if any)
1337 
1338 	   Yes, keeping reference count would be much more clever,
1339 	   but we make one more one thing there: reattach optmem
1340 	   to newsk.
1341 	 */
1342 	opt = ireq->ipv6_opt;
1343 	if (!opt)
1344 		opt = rcu_dereference(np->opt);
1345 	if (opt) {
1346 		opt = ipv6_dup_options(newsk, opt);
1347 		RCU_INIT_POINTER(newnp->opt, opt);
1348 	}
1349 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1350 	if (opt)
1351 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1352 						    opt->opt_flen;
1353 
1354 	tcp_ca_openreq_child(newsk, dst);
1355 
1356 	tcp_sync_mss(newsk, dst_mtu(dst));
1357 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1358 
1359 	tcp_initialize_rcv_mss(newsk);
1360 
1361 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1362 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1363 
1364 #ifdef CONFIG_TCP_MD5SIG
1365 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1366 
1367 	/* Copy over the MD5 key from the original socket */
1368 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1369 	if (key) {
1370 		/* We're using one, so create a matching key
1371 		 * on the newsk structure. If we fail to get
1372 		 * memory, then we end up not copying the key
1373 		 * across. Shucks.
1374 		 */
1375 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1376 			       AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1377 			       sk_gfp_mask(sk, GFP_ATOMIC));
1378 	}
1379 #endif
1380 
1381 	if (__inet_inherit_port(sk, newsk) < 0) {
1382 		inet_csk_prepare_forced_close(newsk);
1383 		tcp_done(newsk);
1384 		goto out;
1385 	}
1386 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1387 				       &found_dup_sk);
1388 	if (*own_req) {
1389 		tcp_move_syn(newtp, req);
1390 
1391 		/* Clone pktoptions received with SYN, if we own the req */
1392 		if (ireq->pktopts) {
1393 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1394 			consume_skb(ireq->pktopts);
1395 			ireq->pktopts = NULL;
1396 			if (newnp->pktoptions)
1397 				tcp_v6_restore_cb(newnp->pktoptions);
1398 		}
1399 	} else {
1400 		if (!req_unhash && found_dup_sk) {
1401 			/* This code path should only be executed in the
1402 			 * syncookie case only
1403 			 */
1404 			bh_unlock_sock(newsk);
1405 			sock_put(newsk);
1406 			newsk = NULL;
1407 		}
1408 	}
1409 
1410 	return newsk;
1411 
1412 out_overflow:
1413 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1414 out_nonewsk:
1415 	dst_release(dst);
1416 out:
1417 	tcp_listendrop(sk);
1418 	return NULL;
1419 }
1420 
1421 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1422 							   u32));
1423 /* The socket must have it's spinlock held when we get
1424  * here, unless it is a TCP_LISTEN socket.
1425  *
1426  * We have a potential double-lock case here, so even when
1427  * doing backlog processing we use the BH locking scheme.
1428  * This is because we cannot sleep with the original spinlock
1429  * held.
1430  */
1431 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1432 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1433 {
1434 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1435 	struct sk_buff *opt_skb = NULL;
1436 	enum skb_drop_reason reason;
1437 	struct tcp_sock *tp;
1438 
1439 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1440 	   goes to IPv4 receive handler and backlogged.
1441 	   From backlog it always goes here. Kerboom...
1442 	   Fortunately, tcp_rcv_established and rcv_established
1443 	   handle them correctly, but it is not case with
1444 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1445 	 */
1446 
1447 	if (skb->protocol == htons(ETH_P_IP))
1448 		return tcp_v4_do_rcv(sk, skb);
1449 
1450 	/*
1451 	 *	socket locking is here for SMP purposes as backlog rcv
1452 	 *	is currently called with bh processing disabled.
1453 	 */
1454 
1455 	/* Do Stevens' IPV6_PKTOPTIONS.
1456 
1457 	   Yes, guys, it is the only place in our code, where we
1458 	   may make it not affecting IPv4.
1459 	   The rest of code is protocol independent,
1460 	   and I do not like idea to uglify IPv4.
1461 
1462 	   Actually, all the idea behind IPV6_PKTOPTIONS
1463 	   looks not very well thought. For now we latch
1464 	   options, received in the last packet, enqueued
1465 	   by tcp. Feel free to propose better solution.
1466 					       --ANK (980728)
1467 	 */
1468 	if (np->rxopt.all)
1469 		opt_skb = skb_clone_and_charge_r(skb, sk);
1470 
1471 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1472 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1473 		struct dst_entry *dst;
1474 
1475 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1476 						lockdep_sock_is_held(sk));
1477 
1478 		sock_rps_save_rxhash(sk, skb);
1479 		sk_mark_napi_id(sk, skb);
1480 		if (dst) {
1481 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1482 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1483 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1484 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1485 				dst_release(dst);
1486 			}
1487 		}
1488 
1489 		tcp_rcv_established(sk, skb);
1490 		if (opt_skb)
1491 			goto ipv6_pktoptions;
1492 		return 0;
1493 	}
1494 
1495 	if (tcp_checksum_complete(skb))
1496 		goto csum_err;
1497 
1498 	if (sk->sk_state == TCP_LISTEN) {
1499 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1500 
1501 		if (!nsk)
1502 			goto discard;
1503 
1504 		if (nsk != sk) {
1505 			if (tcp_child_process(sk, nsk, skb))
1506 				goto reset;
1507 			if (opt_skb)
1508 				__kfree_skb(opt_skb);
1509 			return 0;
1510 		}
1511 	} else
1512 		sock_rps_save_rxhash(sk, skb);
1513 
1514 	if (tcp_rcv_state_process(sk, skb))
1515 		goto reset;
1516 	if (opt_skb)
1517 		goto ipv6_pktoptions;
1518 	return 0;
1519 
1520 reset:
1521 	tcp_v6_send_reset(sk, skb);
1522 discard:
1523 	if (opt_skb)
1524 		__kfree_skb(opt_skb);
1525 	kfree_skb_reason(skb, reason);
1526 	return 0;
1527 csum_err:
1528 	reason = SKB_DROP_REASON_TCP_CSUM;
1529 	trace_tcp_bad_csum(skb);
1530 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1531 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1532 	goto discard;
1533 
1534 
1535 ipv6_pktoptions:
1536 	/* Do you ask, what is it?
1537 
1538 	   1. skb was enqueued by tcp.
1539 	   2. skb is added to tail of read queue, rather than out of order.
1540 	   3. socket is not in passive state.
1541 	   4. Finally, it really contains options, which user wants to receive.
1542 	 */
1543 	tp = tcp_sk(sk);
1544 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1545 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1546 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1547 			np->mcast_oif = tcp_v6_iif(opt_skb);
1548 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1549 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1550 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1551 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1552 		if (np->repflow)
1553 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1554 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1555 			tcp_v6_restore_cb(opt_skb);
1556 			opt_skb = xchg(&np->pktoptions, opt_skb);
1557 		} else {
1558 			__kfree_skb(opt_skb);
1559 			opt_skb = xchg(&np->pktoptions, NULL);
1560 		}
1561 	}
1562 
1563 	consume_skb(opt_skb);
1564 	return 0;
1565 }
1566 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1567 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1568 			   const struct tcphdr *th)
1569 {
1570 	/* This is tricky: we move IP6CB at its correct location into
1571 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1572 	 * _decode_session6() uses IP6CB().
1573 	 * barrier() makes sure compiler won't play aliasing games.
1574 	 */
1575 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1576 		sizeof(struct inet6_skb_parm));
1577 	barrier();
1578 
1579 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1580 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1581 				    skb->len - th->doff*4);
1582 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1583 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1584 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1585 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1586 	TCP_SKB_CB(skb)->sacked = 0;
1587 	TCP_SKB_CB(skb)->has_rxtstamp =
1588 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1589 }
1590 
tcp_v6_rcv(struct sk_buff * skb)1591 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1592 {
1593 	enum skb_drop_reason drop_reason;
1594 	int sdif = inet6_sdif(skb);
1595 	int dif = inet6_iif(skb);
1596 	const struct tcphdr *th;
1597 	const struct ipv6hdr *hdr;
1598 	bool refcounted;
1599 	struct sock *sk;
1600 	int ret;
1601 	struct net *net = dev_net(skb->dev);
1602 
1603 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1604 	if (skb->pkt_type != PACKET_HOST)
1605 		goto discard_it;
1606 
1607 	/*
1608 	 *	Count it even if it's bad.
1609 	 */
1610 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1611 
1612 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1613 		goto discard_it;
1614 
1615 	th = (const struct tcphdr *)skb->data;
1616 
1617 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1618 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1619 		goto bad_packet;
1620 	}
1621 	if (!pskb_may_pull(skb, th->doff*4))
1622 		goto discard_it;
1623 
1624 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1625 		goto csum_error;
1626 
1627 	th = (const struct tcphdr *)skb->data;
1628 	hdr = ipv6_hdr(skb);
1629 
1630 lookup:
1631 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1632 				th->source, th->dest, inet6_iif(skb), sdif,
1633 				&refcounted);
1634 	if (!sk)
1635 		goto no_tcp_socket;
1636 
1637 process:
1638 	if (sk->sk_state == TCP_TIME_WAIT)
1639 		goto do_time_wait;
1640 
1641 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1642 		struct request_sock *req = inet_reqsk(sk);
1643 		bool req_stolen = false;
1644 		struct sock *nsk;
1645 
1646 		sk = req->rsk_listener;
1647 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1648 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1649 		else
1650 			drop_reason = tcp_inbound_md5_hash(sk, skb,
1651 							   &hdr->saddr, &hdr->daddr,
1652 							   AF_INET6, dif, sdif);
1653 		if (drop_reason) {
1654 			sk_drops_add(sk, skb);
1655 			reqsk_put(req);
1656 			goto discard_it;
1657 		}
1658 		if (tcp_checksum_complete(skb)) {
1659 			reqsk_put(req);
1660 			goto csum_error;
1661 		}
1662 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1663 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1664 			if (!nsk) {
1665 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1666 				goto lookup;
1667 			}
1668 			sk = nsk;
1669 			/* reuseport_migrate_sock() has already held one sk_refcnt
1670 			 * before returning.
1671 			 */
1672 		} else {
1673 			sock_hold(sk);
1674 		}
1675 		refcounted = true;
1676 		nsk = NULL;
1677 		if (!tcp_filter(sk, skb)) {
1678 			th = (const struct tcphdr *)skb->data;
1679 			hdr = ipv6_hdr(skb);
1680 			tcp_v6_fill_cb(skb, hdr, th);
1681 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1682 		} else {
1683 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1684 		}
1685 		if (!nsk) {
1686 			reqsk_put(req);
1687 			if (req_stolen) {
1688 				/* Another cpu got exclusive access to req
1689 				 * and created a full blown socket.
1690 				 * Try to feed this packet to this socket
1691 				 * instead of discarding it.
1692 				 */
1693 				tcp_v6_restore_cb(skb);
1694 				sock_put(sk);
1695 				goto lookup;
1696 			}
1697 			goto discard_and_relse;
1698 		}
1699 		nf_reset_ct(skb);
1700 		if (nsk == sk) {
1701 			reqsk_put(req);
1702 			tcp_v6_restore_cb(skb);
1703 		} else if (tcp_child_process(sk, nsk, skb)) {
1704 			tcp_v6_send_reset(nsk, skb);
1705 			goto discard_and_relse;
1706 		} else {
1707 			sock_put(sk);
1708 			return 0;
1709 		}
1710 	}
1711 
1712 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1713 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1714 		if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1715 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1716 			goto discard_and_relse;
1717 		}
1718 	}
1719 
1720 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1721 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1722 		goto discard_and_relse;
1723 	}
1724 
1725 	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1726 					   AF_INET6, dif, sdif);
1727 	if (drop_reason)
1728 		goto discard_and_relse;
1729 
1730 	nf_reset_ct(skb);
1731 
1732 	if (tcp_filter(sk, skb)) {
1733 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1734 		goto discard_and_relse;
1735 	}
1736 	th = (const struct tcphdr *)skb->data;
1737 	hdr = ipv6_hdr(skb);
1738 	tcp_v6_fill_cb(skb, hdr, th);
1739 
1740 	skb->dev = NULL;
1741 
1742 	if (sk->sk_state == TCP_LISTEN) {
1743 		ret = tcp_v6_do_rcv(sk, skb);
1744 		goto put_and_return;
1745 	}
1746 
1747 	sk_incoming_cpu_update(sk);
1748 
1749 	bh_lock_sock_nested(sk);
1750 	tcp_segs_in(tcp_sk(sk), skb);
1751 	ret = 0;
1752 	if (!sock_owned_by_user(sk)) {
1753 		ret = tcp_v6_do_rcv(sk, skb);
1754 	} else {
1755 		if (tcp_add_backlog(sk, skb, &drop_reason))
1756 			goto discard_and_relse;
1757 	}
1758 	bh_unlock_sock(sk);
1759 put_and_return:
1760 	if (refcounted)
1761 		sock_put(sk);
1762 	return ret ? -1 : 0;
1763 
1764 no_tcp_socket:
1765 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1766 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1767 		goto discard_it;
1768 
1769 	tcp_v6_fill_cb(skb, hdr, th);
1770 
1771 	if (tcp_checksum_complete(skb)) {
1772 csum_error:
1773 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1774 		trace_tcp_bad_csum(skb);
1775 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1776 bad_packet:
1777 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1778 	} else {
1779 		tcp_v6_send_reset(NULL, skb);
1780 	}
1781 
1782 discard_it:
1783 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1784 	kfree_skb_reason(skb, drop_reason);
1785 	return 0;
1786 
1787 discard_and_relse:
1788 	sk_drops_add(sk, skb);
1789 	if (refcounted)
1790 		sock_put(sk);
1791 	goto discard_it;
1792 
1793 do_time_wait:
1794 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1795 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1796 		inet_twsk_put(inet_twsk(sk));
1797 		goto discard_it;
1798 	}
1799 
1800 	tcp_v6_fill_cb(skb, hdr, th);
1801 
1802 	if (tcp_checksum_complete(skb)) {
1803 		inet_twsk_put(inet_twsk(sk));
1804 		goto csum_error;
1805 	}
1806 
1807 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1808 	case TCP_TW_SYN:
1809 	{
1810 		struct sock *sk2;
1811 
1812 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1813 					    skb, __tcp_hdrlen(th),
1814 					    &ipv6_hdr(skb)->saddr, th->source,
1815 					    &ipv6_hdr(skb)->daddr,
1816 					    ntohs(th->dest),
1817 					    tcp_v6_iif_l3_slave(skb),
1818 					    sdif);
1819 		if (sk2) {
1820 			struct inet_timewait_sock *tw = inet_twsk(sk);
1821 			inet_twsk_deschedule_put(tw);
1822 			sk = sk2;
1823 			tcp_v6_restore_cb(skb);
1824 			refcounted = false;
1825 			goto process;
1826 		}
1827 	}
1828 		/* to ACK */
1829 		fallthrough;
1830 	case TCP_TW_ACK:
1831 		tcp_v6_timewait_ack(sk, skb);
1832 		break;
1833 	case TCP_TW_RST:
1834 		tcp_v6_send_reset(sk, skb);
1835 		inet_twsk_deschedule_put(inet_twsk(sk));
1836 		goto discard_it;
1837 	case TCP_TW_SUCCESS:
1838 		;
1839 	}
1840 	goto discard_it;
1841 }
1842 
tcp_v6_early_demux(struct sk_buff * skb)1843 void tcp_v6_early_demux(struct sk_buff *skb)
1844 {
1845 	struct net *net = dev_net(skb->dev);
1846 	const struct ipv6hdr *hdr;
1847 	const struct tcphdr *th;
1848 	struct sock *sk;
1849 
1850 	if (skb->pkt_type != PACKET_HOST)
1851 		return;
1852 
1853 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1854 		return;
1855 
1856 	hdr = ipv6_hdr(skb);
1857 	th = tcp_hdr(skb);
1858 
1859 	if (th->doff < sizeof(struct tcphdr) / 4)
1860 		return;
1861 
1862 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1863 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1864 					&hdr->saddr, th->source,
1865 					&hdr->daddr, ntohs(th->dest),
1866 					inet6_iif(skb), inet6_sdif(skb));
1867 	if (sk) {
1868 		skb->sk = sk;
1869 		skb->destructor = sock_edemux;
1870 		if (sk_fullsock(sk)) {
1871 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1872 
1873 			if (dst)
1874 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1875 			if (dst &&
1876 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1877 				skb_dst_set_noref(skb, dst);
1878 		}
1879 	}
1880 }
1881 
1882 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1883 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1884 	.twsk_unique	= tcp_twsk_unique,
1885 	.twsk_destructor = tcp_twsk_destructor,
1886 };
1887 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1888 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1889 {
1890 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1891 }
1892 
1893 const struct inet_connection_sock_af_ops ipv6_specific = {
1894 	.queue_xmit	   = inet6_csk_xmit,
1895 	.send_check	   = tcp_v6_send_check,
1896 	.rebuild_header	   = inet6_sk_rebuild_header,
1897 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1898 	.conn_request	   = tcp_v6_conn_request,
1899 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1900 	.net_header_len	   = sizeof(struct ipv6hdr),
1901 	.net_frag_header_len = sizeof(struct frag_hdr),
1902 	.setsockopt	   = ipv6_setsockopt,
1903 	.getsockopt	   = ipv6_getsockopt,
1904 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1905 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1906 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1907 };
1908 
1909 #ifdef CONFIG_TCP_MD5SIG
1910 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1911 	.md5_lookup	=	tcp_v6_md5_lookup,
1912 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1913 	.md5_parse	=	tcp_v6_parse_md5_keys,
1914 };
1915 #endif
1916 
1917 /*
1918  *	TCP over IPv4 via INET6 API
1919  */
1920 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1921 	.queue_xmit	   = ip_queue_xmit,
1922 	.send_check	   = tcp_v4_send_check,
1923 	.rebuild_header	   = inet_sk_rebuild_header,
1924 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1925 	.conn_request	   = tcp_v6_conn_request,
1926 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1927 	.net_header_len	   = sizeof(struct iphdr),
1928 	.setsockopt	   = ipv6_setsockopt,
1929 	.getsockopt	   = ipv6_getsockopt,
1930 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1931 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1932 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1933 };
1934 
1935 #ifdef CONFIG_TCP_MD5SIG
1936 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1937 	.md5_lookup	=	tcp_v4_md5_lookup,
1938 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1939 	.md5_parse	=	tcp_v6_parse_md5_keys,
1940 };
1941 #endif
1942 
1943 /* NOTE: A lot of things set to zero explicitly by call to
1944  *       sk_alloc() so need not be done here.
1945  */
tcp_v6_init_sock(struct sock * sk)1946 static int tcp_v6_init_sock(struct sock *sk)
1947 {
1948 	struct inet_connection_sock *icsk = inet_csk(sk);
1949 
1950 	tcp_init_sock(sk);
1951 
1952 	icsk->icsk_af_ops = &ipv6_specific;
1953 
1954 #ifdef CONFIG_TCP_MD5SIG
1955 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1956 #endif
1957 
1958 	return 0;
1959 }
1960 
1961 #ifdef CONFIG_PROC_FS
1962 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1963 static void get_openreq6(struct seq_file *seq,
1964 			 const struct request_sock *req, int i)
1965 {
1966 	long ttd = req->rsk_timer.expires - jiffies;
1967 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1968 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1969 
1970 	if (ttd < 0)
1971 		ttd = 0;
1972 
1973 	seq_printf(seq,
1974 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1975 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1976 		   i,
1977 		   src->s6_addr32[0], src->s6_addr32[1],
1978 		   src->s6_addr32[2], src->s6_addr32[3],
1979 		   inet_rsk(req)->ir_num,
1980 		   dest->s6_addr32[0], dest->s6_addr32[1],
1981 		   dest->s6_addr32[2], dest->s6_addr32[3],
1982 		   ntohs(inet_rsk(req)->ir_rmt_port),
1983 		   TCP_SYN_RECV,
1984 		   0, 0, /* could print option size, but that is af dependent. */
1985 		   1,   /* timers active (only the expire timer) */
1986 		   jiffies_to_clock_t(ttd),
1987 		   req->num_timeout,
1988 		   from_kuid_munged(seq_user_ns(seq),
1989 				    sock_i_uid(req->rsk_listener)),
1990 		   0,  /* non standard timer */
1991 		   0, /* open_requests have no inode */
1992 		   0, req);
1993 }
1994 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1995 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1996 {
1997 	const struct in6_addr *dest, *src;
1998 	__u16 destp, srcp;
1999 	int timer_active;
2000 	unsigned long timer_expires;
2001 	const struct inet_sock *inet = inet_sk(sp);
2002 	const struct tcp_sock *tp = tcp_sk(sp);
2003 	const struct inet_connection_sock *icsk = inet_csk(sp);
2004 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2005 	int rx_queue;
2006 	int state;
2007 
2008 	dest  = &sp->sk_v6_daddr;
2009 	src   = &sp->sk_v6_rcv_saddr;
2010 	destp = ntohs(inet->inet_dport);
2011 	srcp  = ntohs(inet->inet_sport);
2012 
2013 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2014 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2015 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2016 		timer_active	= 1;
2017 		timer_expires	= icsk->icsk_timeout;
2018 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2019 		timer_active	= 4;
2020 		timer_expires	= icsk->icsk_timeout;
2021 	} else if (timer_pending(&sp->sk_timer)) {
2022 		timer_active	= 2;
2023 		timer_expires	= sp->sk_timer.expires;
2024 	} else {
2025 		timer_active	= 0;
2026 		timer_expires = jiffies;
2027 	}
2028 
2029 	state = inet_sk_state_load(sp);
2030 	if (state == TCP_LISTEN)
2031 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2032 	else
2033 		/* Because we don't lock the socket,
2034 		 * we might find a transient negative value.
2035 		 */
2036 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2037 				      READ_ONCE(tp->copied_seq), 0);
2038 
2039 	seq_printf(seq,
2040 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2041 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2042 		   i,
2043 		   src->s6_addr32[0], src->s6_addr32[1],
2044 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2045 		   dest->s6_addr32[0], dest->s6_addr32[1],
2046 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2047 		   state,
2048 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2049 		   rx_queue,
2050 		   timer_active,
2051 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2052 		   icsk->icsk_retransmits,
2053 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2054 		   icsk->icsk_probes_out,
2055 		   sock_i_ino(sp),
2056 		   refcount_read(&sp->sk_refcnt), sp,
2057 		   jiffies_to_clock_t(icsk->icsk_rto),
2058 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2059 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2060 		   tcp_snd_cwnd(tp),
2061 		   state == TCP_LISTEN ?
2062 			fastopenq->max_qlen :
2063 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2064 		   );
2065 }
2066 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2067 static void get_timewait6_sock(struct seq_file *seq,
2068 			       struct inet_timewait_sock *tw, int i)
2069 {
2070 	long delta = tw->tw_timer.expires - jiffies;
2071 	const struct in6_addr *dest, *src;
2072 	__u16 destp, srcp;
2073 
2074 	dest = &tw->tw_v6_daddr;
2075 	src  = &tw->tw_v6_rcv_saddr;
2076 	destp = ntohs(tw->tw_dport);
2077 	srcp  = ntohs(tw->tw_sport);
2078 
2079 	seq_printf(seq,
2080 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2081 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2082 		   i,
2083 		   src->s6_addr32[0], src->s6_addr32[1],
2084 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2085 		   dest->s6_addr32[0], dest->s6_addr32[1],
2086 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2087 		   tw->tw_substate, 0, 0,
2088 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2089 		   refcount_read(&tw->tw_refcnt), tw);
2090 }
2091 
tcp6_seq_show(struct seq_file * seq,void * v)2092 static int tcp6_seq_show(struct seq_file *seq, void *v)
2093 {
2094 	struct tcp_iter_state *st;
2095 	struct sock *sk = v;
2096 
2097 	if (v == SEQ_START_TOKEN) {
2098 		seq_puts(seq,
2099 			 "  sl  "
2100 			 "local_address                         "
2101 			 "remote_address                        "
2102 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2103 			 "   uid  timeout inode\n");
2104 		goto out;
2105 	}
2106 	st = seq->private;
2107 
2108 	if (sk->sk_state == TCP_TIME_WAIT)
2109 		get_timewait6_sock(seq, v, st->num);
2110 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2111 		get_openreq6(seq, v, st->num);
2112 	else
2113 		get_tcp6_sock(seq, v, st->num);
2114 out:
2115 	return 0;
2116 }
2117 
2118 static const struct seq_operations tcp6_seq_ops = {
2119 	.show		= tcp6_seq_show,
2120 	.start		= tcp_seq_start,
2121 	.next		= tcp_seq_next,
2122 	.stop		= tcp_seq_stop,
2123 };
2124 
2125 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2126 	.family		= AF_INET6,
2127 };
2128 
tcp6_proc_init(struct net * net)2129 int __net_init tcp6_proc_init(struct net *net)
2130 {
2131 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2132 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2133 		return -ENOMEM;
2134 	return 0;
2135 }
2136 
tcp6_proc_exit(struct net * net)2137 void tcp6_proc_exit(struct net *net)
2138 {
2139 	remove_proc_entry("tcp6", net->proc_net);
2140 }
2141 #endif
2142 
2143 struct proto tcpv6_prot = {
2144 	.name			= "TCPv6",
2145 	.owner			= THIS_MODULE,
2146 	.close			= tcp_close,
2147 	.pre_connect		= tcp_v6_pre_connect,
2148 	.connect		= tcp_v6_connect,
2149 	.disconnect		= tcp_disconnect,
2150 	.accept			= inet_csk_accept,
2151 	.ioctl			= tcp_ioctl,
2152 	.init			= tcp_v6_init_sock,
2153 	.destroy		= tcp_v4_destroy_sock,
2154 	.shutdown		= tcp_shutdown,
2155 	.setsockopt		= tcp_setsockopt,
2156 	.getsockopt		= tcp_getsockopt,
2157 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2158 	.keepalive		= tcp_set_keepalive,
2159 	.recvmsg		= tcp_recvmsg,
2160 	.sendmsg		= tcp_sendmsg,
2161 	.sendpage		= tcp_sendpage,
2162 	.backlog_rcv		= tcp_v6_do_rcv,
2163 	.release_cb		= tcp_release_cb,
2164 	.hash			= inet6_hash,
2165 	.unhash			= inet_unhash,
2166 	.get_port		= inet_csk_get_port,
2167 	.put_port		= inet_put_port,
2168 #ifdef CONFIG_BPF_SYSCALL
2169 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2170 #endif
2171 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2172 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2173 	.stream_memory_free	= tcp_stream_memory_free,
2174 	.sockets_allocated	= &tcp_sockets_allocated,
2175 
2176 	.memory_allocated	= &tcp_memory_allocated,
2177 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2178 
2179 	.memory_pressure	= &tcp_memory_pressure,
2180 	.orphan_count		= &tcp_orphan_count,
2181 	.sysctl_mem		= sysctl_tcp_mem,
2182 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2183 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2184 	.max_header		= MAX_TCP_HEADER,
2185 	.obj_size		= sizeof(struct tcp6_sock),
2186 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2187 	.twsk_prot		= &tcp6_timewait_sock_ops,
2188 	.rsk_prot		= &tcp6_request_sock_ops,
2189 	.h.hashinfo		= NULL,
2190 	.no_autobind		= true,
2191 	.diag_destroy		= tcp_abort,
2192 };
2193 EXPORT_SYMBOL_GPL(tcpv6_prot);
2194 
2195 static const struct inet6_protocol tcpv6_protocol = {
2196 	.handler	=	tcp_v6_rcv,
2197 	.err_handler	=	tcp_v6_err,
2198 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2199 };
2200 
2201 static struct inet_protosw tcpv6_protosw = {
2202 	.type		=	SOCK_STREAM,
2203 	.protocol	=	IPPROTO_TCP,
2204 	.prot		=	&tcpv6_prot,
2205 	.ops		=	&inet6_stream_ops,
2206 	.flags		=	INET_PROTOSW_PERMANENT |
2207 				INET_PROTOSW_ICSK,
2208 };
2209 
tcpv6_net_init(struct net * net)2210 static int __net_init tcpv6_net_init(struct net *net)
2211 {
2212 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2213 				    SOCK_RAW, IPPROTO_TCP, net);
2214 }
2215 
tcpv6_net_exit(struct net * net)2216 static void __net_exit tcpv6_net_exit(struct net *net)
2217 {
2218 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2219 }
2220 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2221 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2222 {
2223 	tcp_twsk_purge(net_exit_list, AF_INET6);
2224 }
2225 
2226 static struct pernet_operations tcpv6_net_ops = {
2227 	.init	    = tcpv6_net_init,
2228 	.exit	    = tcpv6_net_exit,
2229 	.exit_batch = tcpv6_net_exit_batch,
2230 };
2231 
tcpv6_init(void)2232 int __init tcpv6_init(void)
2233 {
2234 	int ret;
2235 
2236 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2237 	if (ret)
2238 		goto out;
2239 
2240 	/* register inet6 protocol */
2241 	ret = inet6_register_protosw(&tcpv6_protosw);
2242 	if (ret)
2243 		goto out_tcpv6_protocol;
2244 
2245 	ret = register_pernet_subsys(&tcpv6_net_ops);
2246 	if (ret)
2247 		goto out_tcpv6_protosw;
2248 
2249 	ret = mptcpv6_init();
2250 	if (ret)
2251 		goto out_tcpv6_pernet_subsys;
2252 
2253 out:
2254 	return ret;
2255 
2256 out_tcpv6_pernet_subsys:
2257 	unregister_pernet_subsys(&tcpv6_net_ops);
2258 out_tcpv6_protosw:
2259 	inet6_unregister_protosw(&tcpv6_protosw);
2260 out_tcpv6_protocol:
2261 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2262 	goto out;
2263 }
2264 
tcpv6_exit(void)2265 void tcpv6_exit(void)
2266 {
2267 	unregister_pernet_subsys(&tcpv6_net_ops);
2268 	inet6_unregister_protosw(&tcpv6_protosw);
2269 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2270 }
2271