• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		rcu_assign_pointer(sk->sk_rx_dst, dst);
111 		sk->sk_rx_dst_ifindex = skb->skb_iif;
112 		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
273 	fl6.flowi6_oif = sk->sk_bound_dev_if;
274 	fl6.flowi6_mark = sk->sk_mark;
275 	fl6.fl6_dport = usin->sin6_port;
276 	fl6.fl6_sport = inet->inet_sport;
277 	fl6.flowi6_uid = sk->sk_uid;
278 
279 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280 	final_p = fl6_update_dst(&fl6, opt, &final);
281 
282 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283 
284 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
285 	if (IS_ERR(dst)) {
286 		err = PTR_ERR(dst);
287 		goto failure;
288 	}
289 
290 	if (!saddr) {
291 		saddr = &fl6.saddr;
292 		sk->sk_v6_rcv_saddr = *saddr;
293 	}
294 
295 	/* set the source address */
296 	np->saddr = *saddr;
297 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298 
299 	sk->sk_gso_type = SKB_GSO_TCPV6;
300 	ip6_dst_store(sk, dst, NULL, NULL);
301 
302 	icsk->icsk_ext_hdr_len = 0;
303 	if (opt)
304 		icsk->icsk_ext_hdr_len = opt->opt_flen +
305 					 opt->opt_nflen;
306 
307 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308 
309 	inet->inet_dport = usin->sin6_port;
310 
311 	tcp_set_state(sk, TCP_SYN_SENT);
312 	err = inet6_hash_connect(tcp_death_row, sk);
313 	if (err)
314 		goto late_failure;
315 
316 	sk_set_txhash(sk);
317 
318 	if (likely(!tp->repair)) {
319 		if (!tp->write_seq)
320 			WRITE_ONCE(tp->write_seq,
321 				   secure_tcpv6_seq(np->saddr.s6_addr32,
322 						    sk->sk_v6_daddr.s6_addr32,
323 						    inet->inet_sport,
324 						    inet->inet_dport));
325 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 						   np->saddr.s6_addr32,
327 						   sk->sk_v6_daddr.s6_addr32);
328 	}
329 
330 	if (tcp_fastopen_defer_connect(sk, &err))
331 		return err;
332 	if (err)
333 		goto late_failure;
334 
335 	err = tcp_connect(sk);
336 	if (err)
337 		goto late_failure;
338 
339 	return 0;
340 
341 late_failure:
342 	tcp_set_state(sk, TCP_CLOSE);
343 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
344 		inet_reset_saddr(sk);
345 failure:
346 	inet->inet_dport = 0;
347 	sk->sk_route_caps = 0;
348 	return err;
349 }
350 
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 	struct dst_entry *dst;
354 	u32 mtu;
355 
356 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 		return;
358 
359 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360 
361 	/* Drop requests trying to increase our current mss.
362 	 * Check done in __ip6_rt_update_pmtu() is too late.
363 	 */
364 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 		return;
366 
367 	dst = inet6_csk_update_pmtu(sk, mtu);
368 	if (!dst)
369 		return;
370 
371 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 		tcp_sync_mss(sk, dst_mtu(dst));
373 		tcp_simple_retransmit(sk);
374 	}
375 }
376 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 		u8 type, u8 code, int offset, __be32 info)
379 {
380 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 	struct net *net = dev_net(skb->dev);
383 	struct request_sock *fastopen;
384 	struct ipv6_pinfo *np;
385 	struct tcp_sock *tp;
386 	__u32 seq, snd_una;
387 	struct sock *sk;
388 	bool fatal;
389 	int err;
390 
391 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
392 					&hdr->daddr, th->dest,
393 					&hdr->saddr, ntohs(th->source),
394 					skb->dev->ifindex, inet6_sdif(skb));
395 
396 	if (!sk) {
397 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 				  ICMP6_MIB_INERRORS);
399 		return -ENOENT;
400 	}
401 
402 	if (sk->sk_state == TCP_TIME_WAIT) {
403 		inet_twsk_put(inet_twsk(sk));
404 		return 0;
405 	}
406 	seq = ntohl(th->seq);
407 	fatal = icmpv6_err_convert(type, code, &err);
408 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 		tcp_req_err(sk, seq, fatal);
410 		return 0;
411 	}
412 
413 	bh_lock_sock(sk);
414 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416 
417 	if (sk->sk_state == TCP_CLOSE)
418 		goto out;
419 
420 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
421 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422 		goto out;
423 	}
424 
425 	tp = tcp_sk(sk);
426 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427 	fastopen = rcu_dereference(tp->fastopen_rsk);
428 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429 	if (sk->sk_state != TCP_LISTEN &&
430 	    !between(seq, snd_una, tp->snd_nxt)) {
431 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 		goto out;
433 	}
434 
435 	np = tcp_inet6_sk(sk);
436 
437 	if (type == NDISC_REDIRECT) {
438 		if (!sock_owned_by_user(sk)) {
439 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
440 
441 			if (dst)
442 				dst->ops->redirect(dst, sk, skb);
443 		}
444 		goto out;
445 	}
446 
447 	if (type == ICMPV6_PKT_TOOBIG) {
448 		u32 mtu = ntohl(info);
449 
450 		/* We are not interested in TCP_LISTEN and open_requests
451 		 * (SYN-ACKs send out by Linux are always <576bytes so
452 		 * they should go through unfragmented).
453 		 */
454 		if (sk->sk_state == TCP_LISTEN)
455 			goto out;
456 
457 		if (!ip6_sk_accept_pmtu(sk))
458 			goto out;
459 
460 		if (mtu < IPV6_MIN_MTU)
461 			goto out;
462 
463 		WRITE_ONCE(tp->mtu_info, mtu);
464 
465 		if (!sock_owned_by_user(sk))
466 			tcp_v6_mtu_reduced(sk);
467 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
468 					   &sk->sk_tsq_flags))
469 			sock_hold(sk);
470 		goto out;
471 	}
472 
473 
474 	/* Might be for an request_sock */
475 	switch (sk->sk_state) {
476 	case TCP_SYN_SENT:
477 	case TCP_SYN_RECV:
478 		/* Only in fast or simultaneous open. If a fast open socket is
479 		 * already accepted it is treated as a connected one below.
480 		 */
481 		if (fastopen && !fastopen->sk)
482 			break;
483 
484 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
485 
486 		if (!sock_owned_by_user(sk)) {
487 			sk->sk_err = err;
488 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
489 
490 			tcp_done(sk);
491 		} else
492 			sk->sk_err_soft = err;
493 		goto out;
494 	case TCP_LISTEN:
495 		break;
496 	default:
497 		/* check if this ICMP message allows revert of backoff.
498 		 * (see RFC 6069)
499 		 */
500 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 		    code == ICMPV6_NOROUTE)
502 			tcp_ld_RTO_revert(sk, seq);
503 	}
504 
505 	if (!sock_owned_by_user(sk) && np->recverr) {
506 		sk->sk_err = err;
507 		sk_error_report(sk);
508 	} else
509 		sk->sk_err_soft = err;
510 
511 out:
512 	bh_unlock_sock(sk);
513 	sock_put(sk);
514 	return 0;
515 }
516 
517 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 			      struct flowi *fl,
520 			      struct request_sock *req,
521 			      struct tcp_fastopen_cookie *foc,
522 			      enum tcp_synack_type synack_type,
523 			      struct sk_buff *syn_skb)
524 {
525 	struct inet_request_sock *ireq = inet_rsk(req);
526 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 	struct ipv6_txoptions *opt;
528 	struct flowi6 *fl6 = &fl->u.ip6;
529 	struct sk_buff *skb;
530 	int err = -ENOMEM;
531 	u8 tclass;
532 
533 	/* First, grab a route. */
534 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 					       IPPROTO_TCP)) == NULL)
536 		goto done;
537 
538 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539 
540 	if (skb) {
541 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 				    &ireq->ir_v6_rmt_addr);
543 
544 		fl6->daddr = ireq->ir_v6_rmt_addr;
545 		if (np->repflow && ireq->pktopts)
546 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547 
548 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 				(np->tclass & INET_ECN_MASK) :
551 				np->tclass;
552 
553 		if (!INET_ECN_is_capable(tclass) &&
554 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
555 			tclass |= INET_ECN_ECT_0;
556 
557 		rcu_read_lock();
558 		opt = ireq->ipv6_opt;
559 		if (!opt)
560 			opt = rcu_dereference(np->opt);
561 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
562 			       tclass, sk->sk_priority);
563 		rcu_read_unlock();
564 		err = net_xmit_eval(err);
565 	}
566 
567 done:
568 	return err;
569 }
570 
571 
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 	kfree(inet_rsk(req)->ipv6_opt);
575 	kfree_skb(inet_rsk(req)->pktopts);
576 }
577 
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 						   const struct in6_addr *addr,
581 						   int l3index)
582 {
583 	return tcp_md5_do_lookup(sk, l3index,
584 				 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 						const struct sock *addr_sk)
589 {
590 	int l3index;
591 
592 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 						 addr_sk->sk_bound_dev_if);
594 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 				    l3index);
596 }
597 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 				 sockptr_t optval, int optlen)
600 {
601 	struct tcp_md5sig cmd;
602 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 	int l3index = 0;
604 	u8 prefixlen;
605 	u8 flags;
606 
607 	if (optlen < sizeof(cmd))
608 		return -EINVAL;
609 
610 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 		return -EFAULT;
612 
613 	if (sin6->sin6_family != AF_INET6)
614 		return -EINVAL;
615 
616 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617 
618 	if (optname == TCP_MD5SIG_EXT &&
619 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620 		prefixlen = cmd.tcpm_prefixlen;
621 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622 					prefixlen > 32))
623 			return -EINVAL;
624 	} else {
625 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626 	}
627 
628 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630 		struct net_device *dev;
631 
632 		rcu_read_lock();
633 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634 		if (dev && netif_is_l3_master(dev))
635 			l3index = dev->ifindex;
636 		rcu_read_unlock();
637 
638 		/* ok to reference set/not set outside of rcu;
639 		 * right now device MUST be an L3 master
640 		 */
641 		if (!dev || !l3index)
642 			return -EINVAL;
643 	}
644 
645 	if (!cmd.tcpm_keylen) {
646 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648 					      AF_INET, prefixlen,
649 					      l3index, flags);
650 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651 				      AF_INET6, prefixlen, l3index, flags);
652 	}
653 
654 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655 		return -EINVAL;
656 
657 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 				      AF_INET, prefixlen, l3index, flags,
660 				      cmd.tcpm_key, cmd.tcpm_keylen,
661 				      GFP_KERNEL);
662 
663 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
664 			      AF_INET6, prefixlen, l3index, flags,
665 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
666 }
667 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)668 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
669 				   const struct in6_addr *daddr,
670 				   const struct in6_addr *saddr,
671 				   const struct tcphdr *th, int nbytes)
672 {
673 	struct tcp6_pseudohdr *bp;
674 	struct scatterlist sg;
675 	struct tcphdr *_th;
676 
677 	bp = hp->scratch;
678 	/* 1. TCP pseudo-header (RFC2460) */
679 	bp->saddr = *saddr;
680 	bp->daddr = *daddr;
681 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
682 	bp->len = cpu_to_be32(nbytes);
683 
684 	_th = (struct tcphdr *)(bp + 1);
685 	memcpy(_th, th, sizeof(*th));
686 	_th->check = 0;
687 
688 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
689 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
690 				sizeof(*bp) + sizeof(*th));
691 	return crypto_ahash_update(hp->md5_req);
692 }
693 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)694 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
695 			       const struct in6_addr *daddr, struct in6_addr *saddr,
696 			       const struct tcphdr *th)
697 {
698 	struct tcp_md5sig_pool *hp;
699 	struct ahash_request *req;
700 
701 	hp = tcp_get_md5sig_pool();
702 	if (!hp)
703 		goto clear_hash_noput;
704 	req = hp->md5_req;
705 
706 	if (crypto_ahash_init(req))
707 		goto clear_hash;
708 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
709 		goto clear_hash;
710 	if (tcp_md5_hash_key(hp, key))
711 		goto clear_hash;
712 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
713 	if (crypto_ahash_final(req))
714 		goto clear_hash;
715 
716 	tcp_put_md5sig_pool();
717 	return 0;
718 
719 clear_hash:
720 	tcp_put_md5sig_pool();
721 clear_hash_noput:
722 	memset(md5_hash, 0, 16);
723 	return 1;
724 }
725 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)726 static int tcp_v6_md5_hash_skb(char *md5_hash,
727 			       const struct tcp_md5sig_key *key,
728 			       const struct sock *sk,
729 			       const struct sk_buff *skb)
730 {
731 	const struct in6_addr *saddr, *daddr;
732 	struct tcp_md5sig_pool *hp;
733 	struct ahash_request *req;
734 	const struct tcphdr *th = tcp_hdr(skb);
735 
736 	if (sk) { /* valid for establish/request sockets */
737 		saddr = &sk->sk_v6_rcv_saddr;
738 		daddr = &sk->sk_v6_daddr;
739 	} else {
740 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
741 		saddr = &ip6h->saddr;
742 		daddr = &ip6h->daddr;
743 	}
744 
745 	hp = tcp_get_md5sig_pool();
746 	if (!hp)
747 		goto clear_hash_noput;
748 	req = hp->md5_req;
749 
750 	if (crypto_ahash_init(req))
751 		goto clear_hash;
752 
753 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
754 		goto clear_hash;
755 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
756 		goto clear_hash;
757 	if (tcp_md5_hash_key(hp, key))
758 		goto clear_hash;
759 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
760 	if (crypto_ahash_final(req))
761 		goto clear_hash;
762 
763 	tcp_put_md5sig_pool();
764 	return 0;
765 
766 clear_hash:
767 	tcp_put_md5sig_pool();
768 clear_hash_noput:
769 	memset(md5_hash, 0, 16);
770 	return 1;
771 }
772 
773 #endif
774 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)775 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
776 				    const struct sk_buff *skb,
777 				    int dif, int sdif)
778 {
779 #ifdef CONFIG_TCP_MD5SIG
780 	const __u8 *hash_location = NULL;
781 	struct tcp_md5sig_key *hash_expected;
782 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
783 	const struct tcphdr *th = tcp_hdr(skb);
784 	int genhash, l3index;
785 	u8 newhash[16];
786 
787 	/* sdif set, means packet ingressed via a device
788 	 * in an L3 domain and dif is set to the l3mdev
789 	 */
790 	l3index = sdif ? dif : 0;
791 
792 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
793 	hash_location = tcp_parse_md5sig_option(th);
794 
795 	/* We've parsed the options - do we have a hash? */
796 	if (!hash_expected && !hash_location)
797 		return false;
798 
799 	if (hash_expected && !hash_location) {
800 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
801 		return true;
802 	}
803 
804 	if (!hash_expected && hash_location) {
805 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
806 		return true;
807 	}
808 
809 	/* check the signature */
810 	genhash = tcp_v6_md5_hash_skb(newhash,
811 				      hash_expected,
812 				      NULL, skb);
813 
814 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
815 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
816 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
817 				     genhash ? "failed" : "mismatch",
818 				     &ip6h->saddr, ntohs(th->source),
819 				     &ip6h->daddr, ntohs(th->dest), l3index);
820 		return true;
821 	}
822 #endif
823 	return false;
824 }
825 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)826 static void tcp_v6_init_req(struct request_sock *req,
827 			    const struct sock *sk_listener,
828 			    struct sk_buff *skb)
829 {
830 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
831 	struct inet_request_sock *ireq = inet_rsk(req);
832 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
833 
834 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
835 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
836 
837 	/* So that link locals have meaning */
838 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
839 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
840 		ireq->ir_iif = tcp_v6_iif(skb);
841 
842 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
843 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
844 	     np->rxopt.bits.rxinfo ||
845 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
846 	     np->rxopt.bits.rxohlim || np->repflow)) {
847 		refcount_inc(&skb->users);
848 		ireq->pktopts = skb;
849 	}
850 }
851 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)852 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
853 					  struct sk_buff *skb,
854 					  struct flowi *fl,
855 					  struct request_sock *req)
856 {
857 	tcp_v6_init_req(req, sk, skb);
858 
859 	if (security_inet_conn_request(sk, skb, req))
860 		return NULL;
861 
862 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
863 }
864 
865 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
866 	.family		=	AF_INET6,
867 	.obj_size	=	sizeof(struct tcp6_request_sock),
868 	.rtx_syn_ack	=	tcp_rtx_synack,
869 	.send_ack	=	tcp_v6_reqsk_send_ack,
870 	.destructor	=	tcp_v6_reqsk_destructor,
871 	.send_reset	=	tcp_v6_send_reset,
872 	.syn_ack_timeout =	tcp_syn_ack_timeout,
873 };
874 
875 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
876 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
877 				sizeof(struct ipv6hdr),
878 #ifdef CONFIG_TCP_MD5SIG
879 	.req_md5_lookup	=	tcp_v6_md5_lookup,
880 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
881 #endif
882 #ifdef CONFIG_SYN_COOKIES
883 	.cookie_init_seq =	cookie_v6_init_sequence,
884 #endif
885 	.route_req	=	tcp_v6_route_req,
886 	.init_seq	=	tcp_v6_init_seq,
887 	.init_ts_off	=	tcp_v6_init_ts_off,
888 	.send_synack	=	tcp_v6_send_synack,
889 };
890 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)891 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
892 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
893 				 int oif, struct tcp_md5sig_key *key, int rst,
894 				 u8 tclass, __be32 label, u32 priority)
895 {
896 	const struct tcphdr *th = tcp_hdr(skb);
897 	struct tcphdr *t1;
898 	struct sk_buff *buff;
899 	struct flowi6 fl6;
900 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
901 	struct sock *ctl_sk = net->ipv6.tcp_sk;
902 	unsigned int tot_len = sizeof(struct tcphdr);
903 	__be32 mrst = 0, *topt;
904 	struct dst_entry *dst;
905 	__u32 mark = 0;
906 
907 	if (tsecr)
908 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
909 #ifdef CONFIG_TCP_MD5SIG
910 	if (key)
911 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
912 #endif
913 
914 #ifdef CONFIG_MPTCP
915 	if (rst && !key) {
916 		mrst = mptcp_reset_option(skb);
917 
918 		if (mrst)
919 			tot_len += sizeof(__be32);
920 	}
921 #endif
922 
923 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
924 			 GFP_ATOMIC);
925 	if (!buff)
926 		return;
927 
928 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
929 
930 	t1 = skb_push(buff, tot_len);
931 	skb_reset_transport_header(buff);
932 
933 	/* Swap the send and the receive. */
934 	memset(t1, 0, sizeof(*t1));
935 	t1->dest = th->source;
936 	t1->source = th->dest;
937 	t1->doff = tot_len / 4;
938 	t1->seq = htonl(seq);
939 	t1->ack_seq = htonl(ack);
940 	t1->ack = !rst || !th->ack;
941 	t1->rst = rst;
942 	t1->window = htons(win);
943 
944 	topt = (__be32 *)(t1 + 1);
945 
946 	if (tsecr) {
947 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
948 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
949 		*topt++ = htonl(tsval);
950 		*topt++ = htonl(tsecr);
951 	}
952 
953 	if (mrst)
954 		*topt++ = mrst;
955 
956 #ifdef CONFIG_TCP_MD5SIG
957 	if (key) {
958 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
959 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
960 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
961 				    &ipv6_hdr(skb)->saddr,
962 				    &ipv6_hdr(skb)->daddr, t1);
963 	}
964 #endif
965 
966 	memset(&fl6, 0, sizeof(fl6));
967 	fl6.daddr = ipv6_hdr(skb)->saddr;
968 	fl6.saddr = ipv6_hdr(skb)->daddr;
969 	fl6.flowlabel = label;
970 
971 	buff->ip_summed = CHECKSUM_PARTIAL;
972 	buff->csum = 0;
973 
974 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
975 
976 	fl6.flowi6_proto = IPPROTO_TCP;
977 	if (rt6_need_strict(&fl6.daddr) && !oif)
978 		fl6.flowi6_oif = tcp_v6_iif(skb);
979 	else {
980 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
981 			oif = skb->skb_iif;
982 
983 		fl6.flowi6_oif = oif;
984 	}
985 
986 	if (sk) {
987 		if (sk->sk_state == TCP_TIME_WAIT) {
988 			mark = inet_twsk(sk)->tw_mark;
989 			/* autoflowlabel relies on buff->hash */
990 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
991 				     PKT_HASH_TYPE_L4);
992 		} else {
993 			mark = sk->sk_mark;
994 		}
995 		buff->tstamp = tcp_transmit_time(sk);
996 	}
997 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
998 	fl6.fl6_dport = t1->dest;
999 	fl6.fl6_sport = t1->source;
1000 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1001 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1002 
1003 	/* Pass a socket to ip6_dst_lookup either it is for RST
1004 	 * Underlying function will use this to retrieve the network
1005 	 * namespace
1006 	 */
1007 	if (sk && sk->sk_state != TCP_TIME_WAIT)
1008 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
1009 	else
1010 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
1011 	if (!IS_ERR(dst)) {
1012 		skb_dst_set(buff, dst);
1013 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1014 			 tclass & ~INET_ECN_MASK, priority);
1015 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1016 		if (rst)
1017 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1018 		return;
1019 	}
1020 
1021 	kfree_skb(buff);
1022 }
1023 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1024 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1025 {
1026 	const struct tcphdr *th = tcp_hdr(skb);
1027 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1028 	u32 seq = 0, ack_seq = 0;
1029 	struct tcp_md5sig_key *key = NULL;
1030 #ifdef CONFIG_TCP_MD5SIG
1031 	const __u8 *hash_location = NULL;
1032 	unsigned char newhash[16];
1033 	int genhash;
1034 	struct sock *sk1 = NULL;
1035 #endif
1036 	__be32 label = 0;
1037 	u32 priority = 0;
1038 	struct net *net;
1039 	int oif = 0;
1040 
1041 	if (th->rst)
1042 		return;
1043 
1044 	/* If sk not NULL, it means we did a successful lookup and incoming
1045 	 * route had to be correct. prequeue might have dropped our dst.
1046 	 */
1047 	if (!sk && !ipv6_unicast_destination(skb))
1048 		return;
1049 
1050 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1051 #ifdef CONFIG_TCP_MD5SIG
1052 	rcu_read_lock();
1053 	hash_location = tcp_parse_md5sig_option(th);
1054 	if (sk && sk_fullsock(sk)) {
1055 		int l3index;
1056 
1057 		/* sdif set, means packet ingressed via a device
1058 		 * in an L3 domain and inet_iif is set to it.
1059 		 */
1060 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1061 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1062 	} else if (hash_location) {
1063 		int dif = tcp_v6_iif_l3_slave(skb);
1064 		int sdif = tcp_v6_sdif(skb);
1065 		int l3index;
1066 
1067 		/*
1068 		 * active side is lost. Try to find listening socket through
1069 		 * source port, and then find md5 key through listening socket.
1070 		 * we are not loose security here:
1071 		 * Incoming packet is checked with md5 hash with finding key,
1072 		 * no RST generated if md5 hash doesn't match.
1073 		 */
1074 		sk1 = inet6_lookup_listener(net,
1075 					   &tcp_hashinfo, NULL, 0,
1076 					   &ipv6h->saddr,
1077 					   th->source, &ipv6h->daddr,
1078 					   ntohs(th->source), dif, sdif);
1079 		if (!sk1)
1080 			goto out;
1081 
1082 		/* sdif set, means packet ingressed via a device
1083 		 * in an L3 domain and dif is set to it.
1084 		 */
1085 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1086 
1087 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1088 		if (!key)
1089 			goto out;
1090 
1091 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1092 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1093 			goto out;
1094 	}
1095 #endif
1096 
1097 	if (th->ack)
1098 		seq = ntohl(th->ack_seq);
1099 	else
1100 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1101 			  (th->doff << 2);
1102 
1103 	if (sk) {
1104 		oif = sk->sk_bound_dev_if;
1105 		if (sk_fullsock(sk)) {
1106 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1107 
1108 			trace_tcp_send_reset(sk, skb);
1109 			if (np->repflow)
1110 				label = ip6_flowlabel(ipv6h);
1111 			priority = sk->sk_priority;
1112 		}
1113 		if (sk->sk_state == TCP_TIME_WAIT) {
1114 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1115 			priority = inet_twsk(sk)->tw_priority;
1116 		}
1117 	} else {
1118 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1119 			label = ip6_flowlabel(ipv6h);
1120 	}
1121 
1122 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1123 			     ipv6_get_dsfield(ipv6h), label, priority);
1124 
1125 #ifdef CONFIG_TCP_MD5SIG
1126 out:
1127 	rcu_read_unlock();
1128 #endif
1129 }
1130 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1131 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1132 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1133 			    struct tcp_md5sig_key *key, u8 tclass,
1134 			    __be32 label, u32 priority)
1135 {
1136 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1137 			     tclass, label, priority);
1138 }
1139 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1140 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1141 {
1142 	struct inet_timewait_sock *tw = inet_twsk(sk);
1143 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1144 
1145 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1146 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1147 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1148 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1149 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1150 
1151 	inet_twsk_put(tw);
1152 }
1153 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1154 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1155 				  struct request_sock *req)
1156 {
1157 	int l3index;
1158 
1159 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1160 
1161 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1162 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1163 	 */
1164 	/* RFC 7323 2.3
1165 	 * The window field (SEG.WND) of every outgoing segment, with the
1166 	 * exception of <SYN> segments, MUST be right-shifted by
1167 	 * Rcv.Wind.Shift bits:
1168 	 */
1169 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1170 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1171 			tcp_rsk(req)->rcv_nxt,
1172 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1173 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1174 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1175 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1176 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1177 }
1178 
1179 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1180 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1181 {
1182 #ifdef CONFIG_SYN_COOKIES
1183 	const struct tcphdr *th = tcp_hdr(skb);
1184 
1185 	if (!th->syn)
1186 		sk = cookie_v6_check(sk, skb);
1187 #endif
1188 	return sk;
1189 }
1190 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1191 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1192 			 struct tcphdr *th, u32 *cookie)
1193 {
1194 	u16 mss = 0;
1195 #ifdef CONFIG_SYN_COOKIES
1196 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1197 				    &tcp_request_sock_ipv6_ops, sk, th);
1198 	if (mss) {
1199 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1200 		tcp_synq_overflow(sk);
1201 	}
1202 #endif
1203 	return mss;
1204 }
1205 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1206 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1207 {
1208 	if (skb->protocol == htons(ETH_P_IP))
1209 		return tcp_v4_conn_request(sk, skb);
1210 
1211 	if (!ipv6_unicast_destination(skb))
1212 		goto drop;
1213 
1214 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1215 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1216 		return 0;
1217 	}
1218 
1219 	return tcp_conn_request(&tcp6_request_sock_ops,
1220 				&tcp_request_sock_ipv6_ops, sk, skb);
1221 
1222 drop:
1223 	tcp_listendrop(sk);
1224 	return 0; /* don't send reset */
1225 }
1226 
tcp_v6_restore_cb(struct sk_buff * skb)1227 static void tcp_v6_restore_cb(struct sk_buff *skb)
1228 {
1229 	/* We need to move header back to the beginning if xfrm6_policy_check()
1230 	 * and tcp_v6_fill_cb() are going to be called again.
1231 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1232 	 */
1233 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1234 		sizeof(struct inet6_skb_parm));
1235 }
1236 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1237 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1238 					 struct request_sock *req,
1239 					 struct dst_entry *dst,
1240 					 struct request_sock *req_unhash,
1241 					 bool *own_req)
1242 {
1243 	struct inet_request_sock *ireq;
1244 	struct ipv6_pinfo *newnp;
1245 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1246 	struct ipv6_txoptions *opt;
1247 	struct inet_sock *newinet;
1248 	bool found_dup_sk = false;
1249 	struct tcp_sock *newtp;
1250 	struct sock *newsk;
1251 #ifdef CONFIG_TCP_MD5SIG
1252 	struct tcp_md5sig_key *key;
1253 	int l3index;
1254 #endif
1255 	struct flowi6 fl6;
1256 
1257 	if (skb->protocol == htons(ETH_P_IP)) {
1258 		/*
1259 		 *	v6 mapped
1260 		 */
1261 
1262 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1263 					     req_unhash, own_req);
1264 
1265 		if (!newsk)
1266 			return NULL;
1267 
1268 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1269 
1270 		newinet = inet_sk(newsk);
1271 		newnp = tcp_inet6_sk(newsk);
1272 		newtp = tcp_sk(newsk);
1273 
1274 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1275 
1276 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1277 
1278 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1279 		if (sk_is_mptcp(newsk))
1280 			mptcpv6_handle_mapped(newsk, true);
1281 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1282 #ifdef CONFIG_TCP_MD5SIG
1283 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1284 #endif
1285 
1286 		newnp->ipv6_mc_list = NULL;
1287 		newnp->ipv6_ac_list = NULL;
1288 		newnp->ipv6_fl_list = NULL;
1289 		newnp->pktoptions  = NULL;
1290 		newnp->opt	   = NULL;
1291 		newnp->mcast_oif   = inet_iif(skb);
1292 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1293 		newnp->rcv_flowinfo = 0;
1294 		if (np->repflow)
1295 			newnp->flow_label = 0;
1296 
1297 		/*
1298 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1299 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1300 		 * that function for the gory details. -acme
1301 		 */
1302 
1303 		/* It is tricky place. Until this moment IPv4 tcp
1304 		   worked with IPv6 icsk.icsk_af_ops.
1305 		   Sync it now.
1306 		 */
1307 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1308 
1309 		return newsk;
1310 	}
1311 
1312 	ireq = inet_rsk(req);
1313 
1314 	if (sk_acceptq_is_full(sk))
1315 		goto out_overflow;
1316 
1317 	if (!dst) {
1318 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1319 		if (!dst)
1320 			goto out;
1321 	}
1322 
1323 	newsk = tcp_create_openreq_child(sk, req, skb);
1324 	if (!newsk)
1325 		goto out_nonewsk;
1326 
1327 	/*
1328 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1329 	 * count here, tcp_create_openreq_child now does this for us, see the
1330 	 * comment in that function for the gory details. -acme
1331 	 */
1332 
1333 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1334 	ip6_dst_store(newsk, dst, NULL, NULL);
1335 	inet6_sk_rx_dst_set(newsk, skb);
1336 
1337 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1338 
1339 	newtp = tcp_sk(newsk);
1340 	newinet = inet_sk(newsk);
1341 	newnp = tcp_inet6_sk(newsk);
1342 
1343 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1344 
1345 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1346 	newnp->saddr = ireq->ir_v6_loc_addr;
1347 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1348 	newsk->sk_bound_dev_if = ireq->ir_iif;
1349 
1350 	/* Now IPv6 options...
1351 
1352 	   First: no IPv4 options.
1353 	 */
1354 	newinet->inet_opt = NULL;
1355 	newnp->ipv6_mc_list = NULL;
1356 	newnp->ipv6_ac_list = NULL;
1357 	newnp->ipv6_fl_list = NULL;
1358 
1359 	/* Clone RX bits */
1360 	newnp->rxopt.all = np->rxopt.all;
1361 
1362 	newnp->pktoptions = NULL;
1363 	newnp->opt	  = NULL;
1364 	newnp->mcast_oif  = tcp_v6_iif(skb);
1365 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1366 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1367 	if (np->repflow)
1368 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1369 
1370 	/* Set ToS of the new socket based upon the value of incoming SYN.
1371 	 * ECT bits are set later in tcp_init_transfer().
1372 	 */
1373 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1374 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1375 
1376 	/* Clone native IPv6 options from listening socket (if any)
1377 
1378 	   Yes, keeping reference count would be much more clever,
1379 	   but we make one more one thing there: reattach optmem
1380 	   to newsk.
1381 	 */
1382 	opt = ireq->ipv6_opt;
1383 	if (!opt)
1384 		opt = rcu_dereference(np->opt);
1385 	if (opt) {
1386 		opt = ipv6_dup_options(newsk, opt);
1387 		RCU_INIT_POINTER(newnp->opt, opt);
1388 	}
1389 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1390 	if (opt)
1391 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1392 						    opt->opt_flen;
1393 
1394 	tcp_ca_openreq_child(newsk, dst);
1395 
1396 	tcp_sync_mss(newsk, dst_mtu(dst));
1397 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1398 
1399 	tcp_initialize_rcv_mss(newsk);
1400 
1401 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1402 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1403 
1404 #ifdef CONFIG_TCP_MD5SIG
1405 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1406 
1407 	/* Copy over the MD5 key from the original socket */
1408 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1409 	if (key) {
1410 		/* We're using one, so create a matching key
1411 		 * on the newsk structure. If we fail to get
1412 		 * memory, then we end up not copying the key
1413 		 * across. Shucks.
1414 		 */
1415 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1416 			       AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1417 			       sk_gfp_mask(sk, GFP_ATOMIC));
1418 	}
1419 #endif
1420 
1421 	if (__inet_inherit_port(sk, newsk) < 0) {
1422 		inet_csk_prepare_forced_close(newsk);
1423 		tcp_done(newsk);
1424 		goto out;
1425 	}
1426 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1427 				       &found_dup_sk);
1428 	if (*own_req) {
1429 		tcp_move_syn(newtp, req);
1430 
1431 		/* Clone pktoptions received with SYN, if we own the req */
1432 		if (ireq->pktopts) {
1433 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1434 			consume_skb(ireq->pktopts);
1435 			ireq->pktopts = NULL;
1436 			if (newnp->pktoptions)
1437 				tcp_v6_restore_cb(newnp->pktoptions);
1438 		}
1439 	} else {
1440 		if (!req_unhash && found_dup_sk) {
1441 			/* This code path should only be executed in the
1442 			 * syncookie case only
1443 			 */
1444 			bh_unlock_sock(newsk);
1445 			sock_put(newsk);
1446 			newsk = NULL;
1447 		}
1448 	}
1449 
1450 	return newsk;
1451 
1452 out_overflow:
1453 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1454 out_nonewsk:
1455 	dst_release(dst);
1456 out:
1457 	tcp_listendrop(sk);
1458 	return NULL;
1459 }
1460 
1461 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1462 							   u32));
1463 /* The socket must have it's spinlock held when we get
1464  * here, unless it is a TCP_LISTEN socket.
1465  *
1466  * We have a potential double-lock case here, so even when
1467  * doing backlog processing we use the BH locking scheme.
1468  * This is because we cannot sleep with the original spinlock
1469  * held.
1470  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1471 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1472 {
1473 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1474 	struct sk_buff *opt_skb = NULL;
1475 	struct tcp_sock *tp;
1476 
1477 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1478 	   goes to IPv4 receive handler and backlogged.
1479 	   From backlog it always goes here. Kerboom...
1480 	   Fortunately, tcp_rcv_established and rcv_established
1481 	   handle them correctly, but it is not case with
1482 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1483 	 */
1484 
1485 	if (skb->protocol == htons(ETH_P_IP))
1486 		return tcp_v4_do_rcv(sk, skb);
1487 
1488 	/*
1489 	 *	socket locking is here for SMP purposes as backlog rcv
1490 	 *	is currently called with bh processing disabled.
1491 	 */
1492 
1493 	/* Do Stevens' IPV6_PKTOPTIONS.
1494 
1495 	   Yes, guys, it is the only place in our code, where we
1496 	   may make it not affecting IPv4.
1497 	   The rest of code is protocol independent,
1498 	   and I do not like idea to uglify IPv4.
1499 
1500 	   Actually, all the idea behind IPV6_PKTOPTIONS
1501 	   looks not very well thought. For now we latch
1502 	   options, received in the last packet, enqueued
1503 	   by tcp. Feel free to propose better solution.
1504 					       --ANK (980728)
1505 	 */
1506 	if (np->rxopt.all)
1507 		opt_skb = skb_clone_and_charge_r(skb, sk);
1508 
1509 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1510 		struct dst_entry *dst;
1511 
1512 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1513 						lockdep_sock_is_held(sk));
1514 
1515 		sock_rps_save_rxhash(sk, skb);
1516 		sk_mark_napi_id(sk, skb);
1517 		if (dst) {
1518 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1519 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1520 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1521 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1522 				dst_release(dst);
1523 			}
1524 		}
1525 
1526 		tcp_rcv_established(sk, skb);
1527 		if (opt_skb)
1528 			goto ipv6_pktoptions;
1529 		return 0;
1530 	}
1531 
1532 	if (tcp_checksum_complete(skb))
1533 		goto csum_err;
1534 
1535 	if (sk->sk_state == TCP_LISTEN) {
1536 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1537 
1538 		if (!nsk)
1539 			goto discard;
1540 
1541 		if (nsk != sk) {
1542 			if (tcp_child_process(sk, nsk, skb))
1543 				goto reset;
1544 			if (opt_skb)
1545 				__kfree_skb(opt_skb);
1546 			return 0;
1547 		}
1548 	} else
1549 		sock_rps_save_rxhash(sk, skb);
1550 
1551 	if (tcp_rcv_state_process(sk, skb))
1552 		goto reset;
1553 	if (opt_skb)
1554 		goto ipv6_pktoptions;
1555 	return 0;
1556 
1557 reset:
1558 	tcp_v6_send_reset(sk, skb);
1559 discard:
1560 	if (opt_skb)
1561 		__kfree_skb(opt_skb);
1562 	kfree_skb(skb);
1563 	return 0;
1564 csum_err:
1565 	trace_tcp_bad_csum(skb);
1566 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1567 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1568 	goto discard;
1569 
1570 
1571 ipv6_pktoptions:
1572 	/* Do you ask, what is it?
1573 
1574 	   1. skb was enqueued by tcp.
1575 	   2. skb is added to tail of read queue, rather than out of order.
1576 	   3. socket is not in passive state.
1577 	   4. Finally, it really contains options, which user wants to receive.
1578 	 */
1579 	tp = tcp_sk(sk);
1580 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1581 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1582 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1583 			np->mcast_oif = tcp_v6_iif(opt_skb);
1584 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1585 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1586 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1587 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1588 		if (np->repflow)
1589 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1590 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1591 			tcp_v6_restore_cb(opt_skb);
1592 			opt_skb = xchg(&np->pktoptions, opt_skb);
1593 		} else {
1594 			__kfree_skb(opt_skb);
1595 			opt_skb = xchg(&np->pktoptions, NULL);
1596 		}
1597 	}
1598 
1599 	kfree_skb(opt_skb);
1600 	return 0;
1601 }
1602 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1603 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1604 			   const struct tcphdr *th)
1605 {
1606 	/* This is tricky: we move IP6CB at its correct location into
1607 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1608 	 * _decode_session6() uses IP6CB().
1609 	 * barrier() makes sure compiler won't play aliasing games.
1610 	 */
1611 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1612 		sizeof(struct inet6_skb_parm));
1613 	barrier();
1614 
1615 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1616 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1617 				    skb->len - th->doff*4);
1618 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1619 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1620 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1621 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1622 	TCP_SKB_CB(skb)->sacked = 0;
1623 	TCP_SKB_CB(skb)->has_rxtstamp =
1624 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1625 }
1626 
tcp_v6_rcv(struct sk_buff * skb)1627 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1628 {
1629 	struct sk_buff *skb_to_free;
1630 	int sdif = inet6_sdif(skb);
1631 	int dif = inet6_iif(skb);
1632 	const struct tcphdr *th;
1633 	const struct ipv6hdr *hdr;
1634 	bool refcounted;
1635 	struct sock *sk;
1636 	int ret;
1637 	struct net *net = dev_net(skb->dev);
1638 
1639 	if (skb->pkt_type != PACKET_HOST)
1640 		goto discard_it;
1641 
1642 	/*
1643 	 *	Count it even if it's bad.
1644 	 */
1645 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1646 
1647 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1648 		goto discard_it;
1649 
1650 	th = (const struct tcphdr *)skb->data;
1651 
1652 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1653 		goto bad_packet;
1654 	if (!pskb_may_pull(skb, th->doff*4))
1655 		goto discard_it;
1656 
1657 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1658 		goto csum_error;
1659 
1660 	th = (const struct tcphdr *)skb->data;
1661 	hdr = ipv6_hdr(skb);
1662 
1663 lookup:
1664 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1665 				th->source, th->dest, inet6_iif(skb), sdif,
1666 				&refcounted);
1667 	if (!sk)
1668 		goto no_tcp_socket;
1669 
1670 process:
1671 	if (sk->sk_state == TCP_TIME_WAIT)
1672 		goto do_time_wait;
1673 
1674 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1675 		struct request_sock *req = inet_reqsk(sk);
1676 		bool req_stolen = false;
1677 		struct sock *nsk;
1678 
1679 		sk = req->rsk_listener;
1680 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1681 			sk_drops_add(sk, skb);
1682 			reqsk_put(req);
1683 			goto discard_it;
1684 		}
1685 		if (tcp_checksum_complete(skb)) {
1686 			reqsk_put(req);
1687 			goto csum_error;
1688 		}
1689 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1690 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1691 			if (!nsk) {
1692 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1693 				goto lookup;
1694 			}
1695 			sk = nsk;
1696 			/* reuseport_migrate_sock() has already held one sk_refcnt
1697 			 * before returning.
1698 			 */
1699 		} else {
1700 			sock_hold(sk);
1701 		}
1702 		refcounted = true;
1703 		nsk = NULL;
1704 		if (!tcp_filter(sk, skb)) {
1705 			th = (const struct tcphdr *)skb->data;
1706 			hdr = ipv6_hdr(skb);
1707 			tcp_v6_fill_cb(skb, hdr, th);
1708 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1709 		}
1710 		if (!nsk) {
1711 			reqsk_put(req);
1712 			if (req_stolen) {
1713 				/* Another cpu got exclusive access to req
1714 				 * and created a full blown socket.
1715 				 * Try to feed this packet to this socket
1716 				 * instead of discarding it.
1717 				 */
1718 				tcp_v6_restore_cb(skb);
1719 				sock_put(sk);
1720 				goto lookup;
1721 			}
1722 			goto discard_and_relse;
1723 		}
1724 		if (nsk == sk) {
1725 			reqsk_put(req);
1726 			tcp_v6_restore_cb(skb);
1727 		} else if (tcp_child_process(sk, nsk, skb)) {
1728 			tcp_v6_send_reset(nsk, skb);
1729 			goto discard_and_relse;
1730 		} else {
1731 			sock_put(sk);
1732 			return 0;
1733 		}
1734 	}
1735 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1736 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1737 		goto discard_and_relse;
1738 	}
1739 
1740 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1741 		goto discard_and_relse;
1742 
1743 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1744 		goto discard_and_relse;
1745 
1746 	if (tcp_filter(sk, skb))
1747 		goto discard_and_relse;
1748 	th = (const struct tcphdr *)skb->data;
1749 	hdr = ipv6_hdr(skb);
1750 	tcp_v6_fill_cb(skb, hdr, th);
1751 
1752 	skb->dev = NULL;
1753 
1754 	if (sk->sk_state == TCP_LISTEN) {
1755 		ret = tcp_v6_do_rcv(sk, skb);
1756 		goto put_and_return;
1757 	}
1758 
1759 	sk_incoming_cpu_update(sk);
1760 
1761 	bh_lock_sock_nested(sk);
1762 	tcp_segs_in(tcp_sk(sk), skb);
1763 	ret = 0;
1764 	if (!sock_owned_by_user(sk)) {
1765 		skb_to_free = sk->sk_rx_skb_cache;
1766 		sk->sk_rx_skb_cache = NULL;
1767 		ret = tcp_v6_do_rcv(sk, skb);
1768 	} else {
1769 		if (tcp_add_backlog(sk, skb))
1770 			goto discard_and_relse;
1771 		skb_to_free = NULL;
1772 	}
1773 	bh_unlock_sock(sk);
1774 	if (skb_to_free)
1775 		__kfree_skb(skb_to_free);
1776 put_and_return:
1777 	if (refcounted)
1778 		sock_put(sk);
1779 	return ret ? -1 : 0;
1780 
1781 no_tcp_socket:
1782 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1783 		goto discard_it;
1784 
1785 	tcp_v6_fill_cb(skb, hdr, th);
1786 
1787 	if (tcp_checksum_complete(skb)) {
1788 csum_error:
1789 		trace_tcp_bad_csum(skb);
1790 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1791 bad_packet:
1792 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1793 	} else {
1794 		tcp_v6_send_reset(NULL, skb);
1795 	}
1796 
1797 discard_it:
1798 	kfree_skb(skb);
1799 	return 0;
1800 
1801 discard_and_relse:
1802 	sk_drops_add(sk, skb);
1803 	if (refcounted)
1804 		sock_put(sk);
1805 	goto discard_it;
1806 
1807 do_time_wait:
1808 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1809 		inet_twsk_put(inet_twsk(sk));
1810 		goto discard_it;
1811 	}
1812 
1813 	tcp_v6_fill_cb(skb, hdr, th);
1814 
1815 	if (tcp_checksum_complete(skb)) {
1816 		inet_twsk_put(inet_twsk(sk));
1817 		goto csum_error;
1818 	}
1819 
1820 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1821 	case TCP_TW_SYN:
1822 	{
1823 		struct sock *sk2;
1824 
1825 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1826 					    skb, __tcp_hdrlen(th),
1827 					    &ipv6_hdr(skb)->saddr, th->source,
1828 					    &ipv6_hdr(skb)->daddr,
1829 					    ntohs(th->dest),
1830 					    tcp_v6_iif_l3_slave(skb),
1831 					    sdif);
1832 		if (sk2) {
1833 			struct inet_timewait_sock *tw = inet_twsk(sk);
1834 			inet_twsk_deschedule_put(tw);
1835 			sk = sk2;
1836 			tcp_v6_restore_cb(skb);
1837 			refcounted = false;
1838 			goto process;
1839 		}
1840 	}
1841 		/* to ACK */
1842 		fallthrough;
1843 	case TCP_TW_ACK:
1844 		tcp_v6_timewait_ack(sk, skb);
1845 		break;
1846 	case TCP_TW_RST:
1847 		tcp_v6_send_reset(sk, skb);
1848 		inet_twsk_deschedule_put(inet_twsk(sk));
1849 		goto discard_it;
1850 	case TCP_TW_SUCCESS:
1851 		;
1852 	}
1853 	goto discard_it;
1854 }
1855 
tcp_v6_early_demux(struct sk_buff * skb)1856 void tcp_v6_early_demux(struct sk_buff *skb)
1857 {
1858 	const struct ipv6hdr *hdr;
1859 	const struct tcphdr *th;
1860 	struct sock *sk;
1861 
1862 	if (skb->pkt_type != PACKET_HOST)
1863 		return;
1864 
1865 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1866 		return;
1867 
1868 	hdr = ipv6_hdr(skb);
1869 	th = tcp_hdr(skb);
1870 
1871 	if (th->doff < sizeof(struct tcphdr) / 4)
1872 		return;
1873 
1874 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1875 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1876 					&hdr->saddr, th->source,
1877 					&hdr->daddr, ntohs(th->dest),
1878 					inet6_iif(skb), inet6_sdif(skb));
1879 	if (sk) {
1880 		skb->sk = sk;
1881 		skb->destructor = sock_edemux;
1882 		if (sk_fullsock(sk)) {
1883 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1884 
1885 			if (dst)
1886 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1887 			if (dst &&
1888 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1889 				skb_dst_set_noref(skb, dst);
1890 		}
1891 	}
1892 }
1893 
1894 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1895 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1896 	.twsk_unique	= tcp_twsk_unique,
1897 	.twsk_destructor = tcp_twsk_destructor,
1898 };
1899 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1900 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1901 {
1902 	struct ipv6_pinfo *np = inet6_sk(sk);
1903 
1904 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1905 }
1906 
1907 const struct inet_connection_sock_af_ops ipv6_specific = {
1908 	.queue_xmit	   = inet6_csk_xmit,
1909 	.send_check	   = tcp_v6_send_check,
1910 	.rebuild_header	   = inet6_sk_rebuild_header,
1911 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1912 	.conn_request	   = tcp_v6_conn_request,
1913 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1914 	.net_header_len	   = sizeof(struct ipv6hdr),
1915 	.net_frag_header_len = sizeof(struct frag_hdr),
1916 	.setsockopt	   = ipv6_setsockopt,
1917 	.getsockopt	   = ipv6_getsockopt,
1918 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1919 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1920 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1921 };
1922 
1923 #ifdef CONFIG_TCP_MD5SIG
1924 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1925 	.md5_lookup	=	tcp_v6_md5_lookup,
1926 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1927 	.md5_parse	=	tcp_v6_parse_md5_keys,
1928 };
1929 #endif
1930 
1931 /*
1932  *	TCP over IPv4 via INET6 API
1933  */
1934 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1935 	.queue_xmit	   = ip_queue_xmit,
1936 	.send_check	   = tcp_v4_send_check,
1937 	.rebuild_header	   = inet_sk_rebuild_header,
1938 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1939 	.conn_request	   = tcp_v6_conn_request,
1940 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1941 	.net_header_len	   = sizeof(struct iphdr),
1942 	.setsockopt	   = ipv6_setsockopt,
1943 	.getsockopt	   = ipv6_getsockopt,
1944 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1945 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1946 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1947 };
1948 
1949 #ifdef CONFIG_TCP_MD5SIG
1950 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1951 	.md5_lookup	=	tcp_v4_md5_lookup,
1952 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1953 	.md5_parse	=	tcp_v6_parse_md5_keys,
1954 };
1955 #endif
1956 
1957 /* NOTE: A lot of things set to zero explicitly by call to
1958  *       sk_alloc() so need not be done here.
1959  */
tcp_v6_init_sock(struct sock * sk)1960 static int tcp_v6_init_sock(struct sock *sk)
1961 {
1962 	struct inet_connection_sock *icsk = inet_csk(sk);
1963 
1964 	tcp_init_sock(sk);
1965 
1966 	icsk->icsk_af_ops = &ipv6_specific;
1967 
1968 #ifdef CONFIG_TCP_MD5SIG
1969 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1970 #endif
1971 
1972 	return 0;
1973 }
1974 
1975 #ifdef CONFIG_PROC_FS
1976 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1977 static void get_openreq6(struct seq_file *seq,
1978 			 const struct request_sock *req, int i)
1979 {
1980 	long ttd = req->rsk_timer.expires - jiffies;
1981 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1982 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1983 
1984 	if (ttd < 0)
1985 		ttd = 0;
1986 
1987 	seq_printf(seq,
1988 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1989 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1990 		   i,
1991 		   src->s6_addr32[0], src->s6_addr32[1],
1992 		   src->s6_addr32[2], src->s6_addr32[3],
1993 		   inet_rsk(req)->ir_num,
1994 		   dest->s6_addr32[0], dest->s6_addr32[1],
1995 		   dest->s6_addr32[2], dest->s6_addr32[3],
1996 		   ntohs(inet_rsk(req)->ir_rmt_port),
1997 		   TCP_SYN_RECV,
1998 		   0, 0, /* could print option size, but that is af dependent. */
1999 		   1,   /* timers active (only the expire timer) */
2000 		   jiffies_to_clock_t(ttd),
2001 		   req->num_timeout,
2002 		   from_kuid_munged(seq_user_ns(seq),
2003 				    sock_i_uid(req->rsk_listener)),
2004 		   0,  /* non standard timer */
2005 		   0, /* open_requests have no inode */
2006 		   0, req);
2007 }
2008 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)2009 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2010 {
2011 	const struct in6_addr *dest, *src;
2012 	__u16 destp, srcp;
2013 	int timer_active;
2014 	unsigned long timer_expires;
2015 	const struct inet_sock *inet = inet_sk(sp);
2016 	const struct tcp_sock *tp = tcp_sk(sp);
2017 	const struct inet_connection_sock *icsk = inet_csk(sp);
2018 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2019 	int rx_queue;
2020 	int state;
2021 
2022 	dest  = &sp->sk_v6_daddr;
2023 	src   = &sp->sk_v6_rcv_saddr;
2024 	destp = ntohs(inet->inet_dport);
2025 	srcp  = ntohs(inet->inet_sport);
2026 
2027 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2028 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2029 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2030 		timer_active	= 1;
2031 		timer_expires	= icsk->icsk_timeout;
2032 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2033 		timer_active	= 4;
2034 		timer_expires	= icsk->icsk_timeout;
2035 	} else if (timer_pending(&sp->sk_timer)) {
2036 		timer_active	= 2;
2037 		timer_expires	= sp->sk_timer.expires;
2038 	} else {
2039 		timer_active	= 0;
2040 		timer_expires = jiffies;
2041 	}
2042 
2043 	state = inet_sk_state_load(sp);
2044 	if (state == TCP_LISTEN)
2045 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2046 	else
2047 		/* Because we don't lock the socket,
2048 		 * we might find a transient negative value.
2049 		 */
2050 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2051 				      READ_ONCE(tp->copied_seq), 0);
2052 
2053 	seq_printf(seq,
2054 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2055 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2056 		   i,
2057 		   src->s6_addr32[0], src->s6_addr32[1],
2058 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2059 		   dest->s6_addr32[0], dest->s6_addr32[1],
2060 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2061 		   state,
2062 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2063 		   rx_queue,
2064 		   timer_active,
2065 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2066 		   icsk->icsk_retransmits,
2067 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2068 		   icsk->icsk_probes_out,
2069 		   sock_i_ino(sp),
2070 		   refcount_read(&sp->sk_refcnt), sp,
2071 		   jiffies_to_clock_t(icsk->icsk_rto),
2072 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2073 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2074 		   tcp_snd_cwnd(tp),
2075 		   state == TCP_LISTEN ?
2076 			fastopenq->max_qlen :
2077 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2078 		   );
2079 }
2080 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2081 static void get_timewait6_sock(struct seq_file *seq,
2082 			       struct inet_timewait_sock *tw, int i)
2083 {
2084 	long delta = tw->tw_timer.expires - jiffies;
2085 	const struct in6_addr *dest, *src;
2086 	__u16 destp, srcp;
2087 
2088 	dest = &tw->tw_v6_daddr;
2089 	src  = &tw->tw_v6_rcv_saddr;
2090 	destp = ntohs(tw->tw_dport);
2091 	srcp  = ntohs(tw->tw_sport);
2092 
2093 	seq_printf(seq,
2094 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2095 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2096 		   i,
2097 		   src->s6_addr32[0], src->s6_addr32[1],
2098 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2099 		   dest->s6_addr32[0], dest->s6_addr32[1],
2100 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2101 		   tw->tw_substate, 0, 0,
2102 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2103 		   refcount_read(&tw->tw_refcnt), tw);
2104 }
2105 
tcp6_seq_show(struct seq_file * seq,void * v)2106 static int tcp6_seq_show(struct seq_file *seq, void *v)
2107 {
2108 	struct tcp_iter_state *st;
2109 	struct sock *sk = v;
2110 
2111 	if (v == SEQ_START_TOKEN) {
2112 		seq_puts(seq,
2113 			 "  sl  "
2114 			 "local_address                         "
2115 			 "remote_address                        "
2116 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2117 			 "   uid  timeout inode\n");
2118 		goto out;
2119 	}
2120 	st = seq->private;
2121 
2122 	if (sk->sk_state == TCP_TIME_WAIT)
2123 		get_timewait6_sock(seq, v, st->num);
2124 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2125 		get_openreq6(seq, v, st->num);
2126 	else
2127 		get_tcp6_sock(seq, v, st->num);
2128 out:
2129 	return 0;
2130 }
2131 
2132 static const struct seq_operations tcp6_seq_ops = {
2133 	.show		= tcp6_seq_show,
2134 	.start		= tcp_seq_start,
2135 	.next		= tcp_seq_next,
2136 	.stop		= tcp_seq_stop,
2137 };
2138 
2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2140 	.family		= AF_INET6,
2141 };
2142 
tcp6_proc_init(struct net * net)2143 int __net_init tcp6_proc_init(struct net *net)
2144 {
2145 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2146 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2147 		return -ENOMEM;
2148 	return 0;
2149 }
2150 
tcp6_proc_exit(struct net * net)2151 void tcp6_proc_exit(struct net *net)
2152 {
2153 	remove_proc_entry("tcp6", net->proc_net);
2154 }
2155 #endif
2156 
2157 struct proto tcpv6_prot = {
2158 	.name			= "TCPv6",
2159 	.owner			= THIS_MODULE,
2160 	.close			= tcp_close,
2161 	.pre_connect		= tcp_v6_pre_connect,
2162 	.connect		= tcp_v6_connect,
2163 	.disconnect		= tcp_disconnect,
2164 	.accept			= inet_csk_accept,
2165 	.ioctl			= tcp_ioctl,
2166 	.init			= tcp_v6_init_sock,
2167 	.destroy		= tcp_v4_destroy_sock,
2168 	.shutdown		= tcp_shutdown,
2169 	.setsockopt		= tcp_setsockopt,
2170 	.getsockopt		= tcp_getsockopt,
2171 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2172 	.keepalive		= tcp_set_keepalive,
2173 	.recvmsg		= tcp_recvmsg,
2174 	.sendmsg		= tcp_sendmsg,
2175 	.sendpage		= tcp_sendpage,
2176 	.backlog_rcv		= tcp_v6_do_rcv,
2177 	.release_cb		= tcp_release_cb,
2178 	.hash			= inet6_hash,
2179 	.unhash			= inet_unhash,
2180 	.get_port		= inet_csk_get_port,
2181 #ifdef CONFIG_BPF_SYSCALL
2182 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2183 #endif
2184 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2185 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2186 	.stream_memory_free	= tcp_stream_memory_free,
2187 	.sockets_allocated	= &tcp_sockets_allocated,
2188 	.memory_allocated	= &tcp_memory_allocated,
2189 	.memory_pressure	= &tcp_memory_pressure,
2190 	.orphan_count		= &tcp_orphan_count,
2191 	.sysctl_mem		= sysctl_tcp_mem,
2192 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2193 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2194 	.max_header		= MAX_TCP_HEADER,
2195 	.obj_size		= sizeof(struct tcp6_sock),
2196 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2197 	.twsk_prot		= &tcp6_timewait_sock_ops,
2198 	.rsk_prot		= &tcp6_request_sock_ops,
2199 	.h.hashinfo		= &tcp_hashinfo,
2200 	.no_autobind		= true,
2201 	.diag_destroy		= tcp_abort,
2202 };
2203 EXPORT_SYMBOL_GPL(tcpv6_prot);
2204 
2205 static const struct inet6_protocol tcpv6_protocol = {
2206 	.handler	=	tcp_v6_rcv,
2207 	.err_handler	=	tcp_v6_err,
2208 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2209 };
2210 
2211 static struct inet_protosw tcpv6_protosw = {
2212 	.type		=	SOCK_STREAM,
2213 	.protocol	=	IPPROTO_TCP,
2214 	.prot		=	&tcpv6_prot,
2215 	.ops		=	&inet6_stream_ops,
2216 	.flags		=	INET_PROTOSW_PERMANENT |
2217 				INET_PROTOSW_ICSK,
2218 };
2219 
tcpv6_net_init(struct net * net)2220 static int __net_init tcpv6_net_init(struct net *net)
2221 {
2222 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2223 				    SOCK_RAW, IPPROTO_TCP, net);
2224 }
2225 
tcpv6_net_exit(struct net * net)2226 static void __net_exit tcpv6_net_exit(struct net *net)
2227 {
2228 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2229 }
2230 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2231 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2232 {
2233 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2234 }
2235 
2236 static struct pernet_operations tcpv6_net_ops = {
2237 	.init	    = tcpv6_net_init,
2238 	.exit	    = tcpv6_net_exit,
2239 	.exit_batch = tcpv6_net_exit_batch,
2240 };
2241 
tcpv6_init(void)2242 int __init tcpv6_init(void)
2243 {
2244 	int ret;
2245 
2246 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2247 	if (ret)
2248 		goto out;
2249 
2250 	/* register inet6 protocol */
2251 	ret = inet6_register_protosw(&tcpv6_protosw);
2252 	if (ret)
2253 		goto out_tcpv6_protocol;
2254 
2255 	ret = register_pernet_subsys(&tcpv6_net_ops);
2256 	if (ret)
2257 		goto out_tcpv6_protosw;
2258 
2259 	ret = mptcpv6_init();
2260 	if (ret)
2261 		goto out_tcpv6_pernet_subsys;
2262 
2263 out:
2264 	return ret;
2265 
2266 out_tcpv6_pernet_subsys:
2267 	unregister_pernet_subsys(&tcpv6_net_ops);
2268 out_tcpv6_protosw:
2269 	inet6_unregister_protosw(&tcpv6_protosw);
2270 out_tcpv6_protocol:
2271 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2272 	goto out;
2273 }
2274 
tcpv6_exit(void)2275 void tcpv6_exit(void)
2276 {
2277 	unregister_pernet_subsys(&tcpv6_net_ops);
2278 	inet6_unregister_protosw(&tcpv6_protosw);
2279 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2280 }
2281