• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		rcu_assign_pointer(sk->sk_rx_dst, dst);
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
273 	fl6.flowi6_oif = sk->sk_bound_dev_if;
274 	fl6.flowi6_mark = sk->sk_mark;
275 	fl6.fl6_dport = usin->sin6_port;
276 	fl6.fl6_sport = inet->inet_sport;
277 	fl6.flowi6_uid = sk->sk_uid;
278 
279 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280 	final_p = fl6_update_dst(&fl6, opt, &final);
281 
282 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283 
284 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
285 	if (IS_ERR(dst)) {
286 		err = PTR_ERR(dst);
287 		goto failure;
288 	}
289 
290 	if (!saddr) {
291 		saddr = &fl6.saddr;
292 		sk->sk_v6_rcv_saddr = *saddr;
293 	}
294 
295 	/* set the source address */
296 	np->saddr = *saddr;
297 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298 
299 	sk->sk_gso_type = SKB_GSO_TCPV6;
300 	ip6_dst_store(sk, dst, NULL, NULL);
301 
302 	icsk->icsk_ext_hdr_len = 0;
303 	if (opt)
304 		icsk->icsk_ext_hdr_len = opt->opt_flen +
305 					 opt->opt_nflen;
306 
307 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308 
309 	inet->inet_dport = usin->sin6_port;
310 
311 	tcp_set_state(sk, TCP_SYN_SENT);
312 	err = inet6_hash_connect(tcp_death_row, sk);
313 	if (err)
314 		goto late_failure;
315 
316 	sk_set_txhash(sk);
317 
318 	if (likely(!tp->repair)) {
319 		if (!tp->write_seq)
320 			WRITE_ONCE(tp->write_seq,
321 				   secure_tcpv6_seq(np->saddr.s6_addr32,
322 						    sk->sk_v6_daddr.s6_addr32,
323 						    inet->inet_sport,
324 						    inet->inet_dport));
325 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 						   np->saddr.s6_addr32,
327 						   sk->sk_v6_daddr.s6_addr32);
328 	}
329 
330 	if (tcp_fastopen_defer_connect(sk, &err))
331 		return err;
332 	if (err)
333 		goto late_failure;
334 
335 	err = tcp_connect(sk);
336 	if (err)
337 		goto late_failure;
338 
339 	return 0;
340 
341 late_failure:
342 	tcp_set_state(sk, TCP_CLOSE);
343 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
344 		inet_reset_saddr(sk);
345 failure:
346 	inet->inet_dport = 0;
347 	sk->sk_route_caps = 0;
348 	return err;
349 }
350 
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 	struct dst_entry *dst;
354 	u32 mtu;
355 
356 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 		return;
358 
359 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360 
361 	/* Drop requests trying to increase our current mss.
362 	 * Check done in __ip6_rt_update_pmtu() is too late.
363 	 */
364 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 		return;
366 
367 	dst = inet6_csk_update_pmtu(sk, mtu);
368 	if (!dst)
369 		return;
370 
371 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372 		tcp_sync_mss(sk, dst_mtu(dst));
373 		tcp_simple_retransmit(sk);
374 	}
375 }
376 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 		u8 type, u8 code, int offset, __be32 info)
379 {
380 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382 	struct net *net = dev_net(skb->dev);
383 	struct request_sock *fastopen;
384 	struct ipv6_pinfo *np;
385 	struct tcp_sock *tp;
386 	__u32 seq, snd_una;
387 	struct sock *sk;
388 	bool fatal;
389 	int err;
390 
391 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
392 					&hdr->daddr, th->dest,
393 					&hdr->saddr, ntohs(th->source),
394 					skb->dev->ifindex, inet6_sdif(skb));
395 
396 	if (!sk) {
397 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 				  ICMP6_MIB_INERRORS);
399 		return -ENOENT;
400 	}
401 
402 	if (sk->sk_state == TCP_TIME_WAIT) {
403 		inet_twsk_put(inet_twsk(sk));
404 		return 0;
405 	}
406 	seq = ntohl(th->seq);
407 	fatal = icmpv6_err_convert(type, code, &err);
408 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
409 		tcp_req_err(sk, seq, fatal);
410 		return 0;
411 	}
412 
413 	bh_lock_sock(sk);
414 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416 
417 	if (sk->sk_state == TCP_CLOSE)
418 		goto out;
419 
420 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
421 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422 		goto out;
423 	}
424 
425 	tp = tcp_sk(sk);
426 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427 	fastopen = rcu_dereference(tp->fastopen_rsk);
428 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429 	if (sk->sk_state != TCP_LISTEN &&
430 	    !between(seq, snd_una, tp->snd_nxt)) {
431 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 		goto out;
433 	}
434 
435 	np = tcp_inet6_sk(sk);
436 
437 	if (type == NDISC_REDIRECT) {
438 		if (!sock_owned_by_user(sk)) {
439 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
440 
441 			if (dst)
442 				dst->ops->redirect(dst, sk, skb);
443 		}
444 		goto out;
445 	}
446 
447 	if (type == ICMPV6_PKT_TOOBIG) {
448 		u32 mtu = ntohl(info);
449 
450 		/* We are not interested in TCP_LISTEN and open_requests
451 		 * (SYN-ACKs send out by Linux are always <576bytes so
452 		 * they should go through unfragmented).
453 		 */
454 		if (sk->sk_state == TCP_LISTEN)
455 			goto out;
456 
457 		if (!ip6_sk_accept_pmtu(sk))
458 			goto out;
459 
460 		if (mtu < IPV6_MIN_MTU)
461 			goto out;
462 
463 		WRITE_ONCE(tp->mtu_info, mtu);
464 
465 		if (!sock_owned_by_user(sk))
466 			tcp_v6_mtu_reduced(sk);
467 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
468 					   &sk->sk_tsq_flags))
469 			sock_hold(sk);
470 		goto out;
471 	}
472 
473 
474 	/* Might be for an request_sock */
475 	switch (sk->sk_state) {
476 	case TCP_SYN_SENT:
477 	case TCP_SYN_RECV:
478 		/* Only in fast or simultaneous open. If a fast open socket is
479 		 * already accepted it is treated as a connected one below.
480 		 */
481 		if (fastopen && !fastopen->sk)
482 			break;
483 
484 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
485 
486 		if (!sock_owned_by_user(sk)) {
487 			sk->sk_err = err;
488 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
489 
490 			tcp_done(sk);
491 		} else
492 			sk->sk_err_soft = err;
493 		goto out;
494 	case TCP_LISTEN:
495 		break;
496 	default:
497 		/* check if this ICMP message allows revert of backoff.
498 		 * (see RFC 6069)
499 		 */
500 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501 		    code == ICMPV6_NOROUTE)
502 			tcp_ld_RTO_revert(sk, seq);
503 	}
504 
505 	if (!sock_owned_by_user(sk) && np->recverr) {
506 		sk->sk_err = err;
507 		sk->sk_error_report(sk);
508 	} else
509 		sk->sk_err_soft = err;
510 
511 out:
512 	bh_unlock_sock(sk);
513 	sock_put(sk);
514 	return 0;
515 }
516 
517 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519 			      struct flowi *fl,
520 			      struct request_sock *req,
521 			      struct tcp_fastopen_cookie *foc,
522 			      enum tcp_synack_type synack_type,
523 			      struct sk_buff *syn_skb)
524 {
525 	struct inet_request_sock *ireq = inet_rsk(req);
526 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527 	struct ipv6_txoptions *opt;
528 	struct flowi6 *fl6 = &fl->u.ip6;
529 	struct sk_buff *skb;
530 	int err = -ENOMEM;
531 	u8 tclass;
532 
533 	/* First, grab a route. */
534 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535 					       IPPROTO_TCP)) == NULL)
536 		goto done;
537 
538 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539 
540 	if (skb) {
541 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542 				    &ireq->ir_v6_rmt_addr);
543 
544 		fl6->daddr = ireq->ir_v6_rmt_addr;
545 		if (np->repflow && ireq->pktopts)
546 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547 
548 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550 				(np->tclass & INET_ECN_MASK) :
551 				np->tclass;
552 
553 		if (!INET_ECN_is_capable(tclass) &&
554 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
555 			tclass |= INET_ECN_ECT_0;
556 
557 		rcu_read_lock();
558 		opt = ireq->ipv6_opt;
559 		if (!opt)
560 			opt = rcu_dereference(np->opt);
561 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
562 			       tclass, sk->sk_priority);
563 		rcu_read_unlock();
564 		err = net_xmit_eval(err);
565 	}
566 
567 done:
568 	return err;
569 }
570 
571 
tcp_v6_reqsk_destructor(struct request_sock * req)572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574 	kfree(inet_rsk(req)->ipv6_opt);
575 	kfree_skb(inet_rsk(req)->pktopts);
576 }
577 
578 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580 						   const struct in6_addr *addr,
581 						   int l3index)
582 {
583 	return tcp_md5_do_lookup(sk, l3index,
584 				 (union tcp_md5_addr *)addr, AF_INET6);
585 }
586 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588 						const struct sock *addr_sk)
589 {
590 	int l3index;
591 
592 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593 						 addr_sk->sk_bound_dev_if);
594 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 				    l3index);
596 }
597 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599 				 sockptr_t optval, int optlen)
600 {
601 	struct tcp_md5sig cmd;
602 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 	int l3index = 0;
604 	u8 prefixlen;
605 
606 	if (optlen < sizeof(cmd))
607 		return -EINVAL;
608 
609 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
610 		return -EFAULT;
611 
612 	if (sin6->sin6_family != AF_INET6)
613 		return -EINVAL;
614 
615 	if (optname == TCP_MD5SIG_EXT &&
616 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
617 		prefixlen = cmd.tcpm_prefixlen;
618 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
619 					prefixlen > 32))
620 			return -EINVAL;
621 	} else {
622 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
623 	}
624 
625 	if (optname == TCP_MD5SIG_EXT &&
626 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
627 		struct net_device *dev;
628 
629 		rcu_read_lock();
630 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
631 		if (dev && netif_is_l3_master(dev))
632 			l3index = dev->ifindex;
633 		rcu_read_unlock();
634 
635 		/* ok to reference set/not set outside of rcu;
636 		 * right now device MUST be an L3 master
637 		 */
638 		if (!dev || !l3index)
639 			return -EINVAL;
640 	}
641 
642 	if (!cmd.tcpm_keylen) {
643 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
644 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
645 					      AF_INET, prefixlen,
646 					      l3index);
647 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
648 				      AF_INET6, prefixlen, l3index);
649 	}
650 
651 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
652 		return -EINVAL;
653 
654 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
655 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
656 				      AF_INET, prefixlen, l3index,
657 				      cmd.tcpm_key, cmd.tcpm_keylen,
658 				      GFP_KERNEL);
659 
660 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
661 			      AF_INET6, prefixlen, l3index,
662 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
663 }
664 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)665 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
666 				   const struct in6_addr *daddr,
667 				   const struct in6_addr *saddr,
668 				   const struct tcphdr *th, int nbytes)
669 {
670 	struct tcp6_pseudohdr *bp;
671 	struct scatterlist sg;
672 	struct tcphdr *_th;
673 
674 	bp = hp->scratch;
675 	/* 1. TCP pseudo-header (RFC2460) */
676 	bp->saddr = *saddr;
677 	bp->daddr = *daddr;
678 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
679 	bp->len = cpu_to_be32(nbytes);
680 
681 	_th = (struct tcphdr *)(bp + 1);
682 	memcpy(_th, th, sizeof(*th));
683 	_th->check = 0;
684 
685 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
686 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
687 				sizeof(*bp) + sizeof(*th));
688 	return crypto_ahash_update(hp->md5_req);
689 }
690 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)691 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
692 			       const struct in6_addr *daddr, struct in6_addr *saddr,
693 			       const struct tcphdr *th)
694 {
695 	struct tcp_md5sig_pool *hp;
696 	struct ahash_request *req;
697 
698 	hp = tcp_get_md5sig_pool();
699 	if (!hp)
700 		goto clear_hash_noput;
701 	req = hp->md5_req;
702 
703 	if (crypto_ahash_init(req))
704 		goto clear_hash;
705 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
706 		goto clear_hash;
707 	if (tcp_md5_hash_key(hp, key))
708 		goto clear_hash;
709 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
710 	if (crypto_ahash_final(req))
711 		goto clear_hash;
712 
713 	tcp_put_md5sig_pool();
714 	return 0;
715 
716 clear_hash:
717 	tcp_put_md5sig_pool();
718 clear_hash_noput:
719 	memset(md5_hash, 0, 16);
720 	return 1;
721 }
722 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)723 static int tcp_v6_md5_hash_skb(char *md5_hash,
724 			       const struct tcp_md5sig_key *key,
725 			       const struct sock *sk,
726 			       const struct sk_buff *skb)
727 {
728 	const struct in6_addr *saddr, *daddr;
729 	struct tcp_md5sig_pool *hp;
730 	struct ahash_request *req;
731 	const struct tcphdr *th = tcp_hdr(skb);
732 
733 	if (sk) { /* valid for establish/request sockets */
734 		saddr = &sk->sk_v6_rcv_saddr;
735 		daddr = &sk->sk_v6_daddr;
736 	} else {
737 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
738 		saddr = &ip6h->saddr;
739 		daddr = &ip6h->daddr;
740 	}
741 
742 	hp = tcp_get_md5sig_pool();
743 	if (!hp)
744 		goto clear_hash_noput;
745 	req = hp->md5_req;
746 
747 	if (crypto_ahash_init(req))
748 		goto clear_hash;
749 
750 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
751 		goto clear_hash;
752 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
753 		goto clear_hash;
754 	if (tcp_md5_hash_key(hp, key))
755 		goto clear_hash;
756 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
757 	if (crypto_ahash_final(req))
758 		goto clear_hash;
759 
760 	tcp_put_md5sig_pool();
761 	return 0;
762 
763 clear_hash:
764 	tcp_put_md5sig_pool();
765 clear_hash_noput:
766 	memset(md5_hash, 0, 16);
767 	return 1;
768 }
769 
770 #endif
771 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)772 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
773 				    const struct sk_buff *skb,
774 				    int dif, int sdif)
775 {
776 #ifdef CONFIG_TCP_MD5SIG
777 	const __u8 *hash_location = NULL;
778 	struct tcp_md5sig_key *hash_expected;
779 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
780 	const struct tcphdr *th = tcp_hdr(skb);
781 	int genhash, l3index;
782 	u8 newhash[16];
783 
784 	/* sdif set, means packet ingressed via a device
785 	 * in an L3 domain and dif is set to the l3mdev
786 	 */
787 	l3index = sdif ? dif : 0;
788 
789 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
790 	hash_location = tcp_parse_md5sig_option(th);
791 
792 	/* We've parsed the options - do we have a hash? */
793 	if (!hash_expected && !hash_location)
794 		return false;
795 
796 	if (hash_expected && !hash_location) {
797 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
798 		return true;
799 	}
800 
801 	if (!hash_expected && hash_location) {
802 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
803 		return true;
804 	}
805 
806 	/* check the signature */
807 	genhash = tcp_v6_md5_hash_skb(newhash,
808 				      hash_expected,
809 				      NULL, skb);
810 
811 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
812 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
813 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
814 				     genhash ? "failed" : "mismatch",
815 				     &ip6h->saddr, ntohs(th->source),
816 				     &ip6h->daddr, ntohs(th->dest), l3index);
817 		return true;
818 	}
819 #endif
820 	return false;
821 }
822 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)823 static void tcp_v6_init_req(struct request_sock *req,
824 			    const struct sock *sk_listener,
825 			    struct sk_buff *skb)
826 {
827 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
828 	struct inet_request_sock *ireq = inet_rsk(req);
829 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
830 
831 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
832 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
833 
834 	/* So that link locals have meaning */
835 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
836 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
837 		ireq->ir_iif = tcp_v6_iif(skb);
838 
839 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
840 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
841 	     np->rxopt.bits.rxinfo ||
842 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
843 	     np->rxopt.bits.rxohlim || np->repflow)) {
844 		refcount_inc(&skb->users);
845 		ireq->pktopts = skb;
846 	}
847 }
848 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)849 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
850 					  struct flowi *fl,
851 					  const struct request_sock *req)
852 {
853 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
854 }
855 
856 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
857 	.family		=	AF_INET6,
858 	.obj_size	=	sizeof(struct tcp6_request_sock),
859 	.rtx_syn_ack	=	tcp_rtx_synack,
860 	.send_ack	=	tcp_v6_reqsk_send_ack,
861 	.destructor	=	tcp_v6_reqsk_destructor,
862 	.send_reset	=	tcp_v6_send_reset,
863 	.syn_ack_timeout =	tcp_syn_ack_timeout,
864 };
865 
866 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
867 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
868 				sizeof(struct ipv6hdr),
869 #ifdef CONFIG_TCP_MD5SIG
870 	.req_md5_lookup	=	tcp_v6_md5_lookup,
871 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
872 #endif
873 	.init_req	=	tcp_v6_init_req,
874 #ifdef CONFIG_SYN_COOKIES
875 	.cookie_init_seq =	cookie_v6_init_sequence,
876 #endif
877 	.route_req	=	tcp_v6_route_req,
878 	.init_seq	=	tcp_v6_init_seq,
879 	.init_ts_off	=	tcp_v6_init_ts_off,
880 	.send_synack	=	tcp_v6_send_synack,
881 };
882 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)883 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
884 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
885 				 int oif, struct tcp_md5sig_key *key, int rst,
886 				 u8 tclass, __be32 label, u32 priority)
887 {
888 	const struct tcphdr *th = tcp_hdr(skb);
889 	struct tcphdr *t1;
890 	struct sk_buff *buff;
891 	struct flowi6 fl6;
892 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
893 	struct sock *ctl_sk = net->ipv6.tcp_sk;
894 	unsigned int tot_len = sizeof(struct tcphdr);
895 	struct dst_entry *dst;
896 	__be32 *topt;
897 	__u32 mark = 0;
898 
899 	if (tsecr)
900 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
901 #ifdef CONFIG_TCP_MD5SIG
902 	if (key)
903 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
904 #endif
905 
906 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
907 			 GFP_ATOMIC);
908 	if (!buff)
909 		return;
910 
911 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
912 
913 	t1 = skb_push(buff, tot_len);
914 	skb_reset_transport_header(buff);
915 
916 	/* Swap the send and the receive. */
917 	memset(t1, 0, sizeof(*t1));
918 	t1->dest = th->source;
919 	t1->source = th->dest;
920 	t1->doff = tot_len / 4;
921 	t1->seq = htonl(seq);
922 	t1->ack_seq = htonl(ack);
923 	t1->ack = !rst || !th->ack;
924 	t1->rst = rst;
925 	t1->window = htons(win);
926 
927 	topt = (__be32 *)(t1 + 1);
928 
929 	if (tsecr) {
930 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
931 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
932 		*topt++ = htonl(tsval);
933 		*topt++ = htonl(tsecr);
934 	}
935 
936 #ifdef CONFIG_TCP_MD5SIG
937 	if (key) {
938 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
939 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
940 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
941 				    &ipv6_hdr(skb)->saddr,
942 				    &ipv6_hdr(skb)->daddr, t1);
943 	}
944 #endif
945 
946 	memset(&fl6, 0, sizeof(fl6));
947 	fl6.daddr = ipv6_hdr(skb)->saddr;
948 	fl6.saddr = ipv6_hdr(skb)->daddr;
949 	fl6.flowlabel = label;
950 
951 	buff->ip_summed = CHECKSUM_PARTIAL;
952 	buff->csum = 0;
953 
954 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
955 
956 	fl6.flowi6_proto = IPPROTO_TCP;
957 	if (rt6_need_strict(&fl6.daddr) && !oif)
958 		fl6.flowi6_oif = tcp_v6_iif(skb);
959 	else {
960 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
961 			oif = skb->skb_iif;
962 
963 		fl6.flowi6_oif = oif;
964 	}
965 
966 	if (sk) {
967 		if (sk->sk_state == TCP_TIME_WAIT) {
968 			mark = inet_twsk(sk)->tw_mark;
969 			/* autoflowlabel relies on buff->hash */
970 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
971 				     PKT_HASH_TYPE_L4);
972 		} else {
973 			mark = sk->sk_mark;
974 		}
975 		buff->tstamp = tcp_transmit_time(sk);
976 	}
977 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
978 	fl6.fl6_dport = t1->dest;
979 	fl6.fl6_sport = t1->source;
980 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
981 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
982 
983 	/* Pass a socket to ip6_dst_lookup either it is for RST
984 	 * Underlying function will use this to retrieve the network
985 	 * namespace
986 	 */
987 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
988 	if (!IS_ERR(dst)) {
989 		skb_dst_set(buff, dst);
990 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
991 			 tclass & ~INET_ECN_MASK, priority);
992 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
993 		if (rst)
994 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
995 		return;
996 	}
997 
998 	kfree_skb(buff);
999 }
1000 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1001 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1002 {
1003 	const struct tcphdr *th = tcp_hdr(skb);
1004 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1005 	u32 seq = 0, ack_seq = 0;
1006 	struct tcp_md5sig_key *key = NULL;
1007 #ifdef CONFIG_TCP_MD5SIG
1008 	const __u8 *hash_location = NULL;
1009 	unsigned char newhash[16];
1010 	int genhash;
1011 	struct sock *sk1 = NULL;
1012 #endif
1013 	__be32 label = 0;
1014 	u32 priority = 0;
1015 	struct net *net;
1016 	int oif = 0;
1017 
1018 	if (th->rst)
1019 		return;
1020 
1021 	/* If sk not NULL, it means we did a successful lookup and incoming
1022 	 * route had to be correct. prequeue might have dropped our dst.
1023 	 */
1024 	if (!sk && !ipv6_unicast_destination(skb))
1025 		return;
1026 
1027 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1028 #ifdef CONFIG_TCP_MD5SIG
1029 	rcu_read_lock();
1030 	hash_location = tcp_parse_md5sig_option(th);
1031 	if (sk && sk_fullsock(sk)) {
1032 		int l3index;
1033 
1034 		/* sdif set, means packet ingressed via a device
1035 		 * in an L3 domain and inet_iif is set to it.
1036 		 */
1037 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1038 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1039 	} else if (hash_location) {
1040 		int dif = tcp_v6_iif_l3_slave(skb);
1041 		int sdif = tcp_v6_sdif(skb);
1042 		int l3index;
1043 
1044 		/*
1045 		 * active side is lost. Try to find listening socket through
1046 		 * source port, and then find md5 key through listening socket.
1047 		 * we are not loose security here:
1048 		 * Incoming packet is checked with md5 hash with finding key,
1049 		 * no RST generated if md5 hash doesn't match.
1050 		 */
1051 		sk1 = inet6_lookup_listener(net,
1052 					   &tcp_hashinfo, NULL, 0,
1053 					   &ipv6h->saddr,
1054 					   th->source, &ipv6h->daddr,
1055 					   ntohs(th->source), dif, sdif);
1056 		if (!sk1)
1057 			goto out;
1058 
1059 		/* sdif set, means packet ingressed via a device
1060 		 * in an L3 domain and dif is set to it.
1061 		 */
1062 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1063 
1064 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1065 		if (!key)
1066 			goto out;
1067 
1068 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1069 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1070 			goto out;
1071 	}
1072 #endif
1073 
1074 	if (th->ack)
1075 		seq = ntohl(th->ack_seq);
1076 	else
1077 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1078 			  (th->doff << 2);
1079 
1080 	if (sk) {
1081 		oif = sk->sk_bound_dev_if;
1082 		if (sk_fullsock(sk)) {
1083 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1084 
1085 			trace_tcp_send_reset(sk, skb);
1086 			if (np->repflow)
1087 				label = ip6_flowlabel(ipv6h);
1088 			priority = sk->sk_priority;
1089 		}
1090 		if (sk->sk_state == TCP_TIME_WAIT) {
1091 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1092 			priority = inet_twsk(sk)->tw_priority;
1093 		}
1094 	} else {
1095 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1096 			label = ip6_flowlabel(ipv6h);
1097 	}
1098 
1099 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1100 			     ipv6_get_dsfield(ipv6h), label, priority);
1101 
1102 #ifdef CONFIG_TCP_MD5SIG
1103 out:
1104 	rcu_read_unlock();
1105 #endif
1106 }
1107 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1108 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1109 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1110 			    struct tcp_md5sig_key *key, u8 tclass,
1111 			    __be32 label, u32 priority)
1112 {
1113 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1114 			     tclass, label, priority);
1115 }
1116 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1117 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1118 {
1119 	struct inet_timewait_sock *tw = inet_twsk(sk);
1120 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1121 
1122 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1123 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1124 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1125 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1126 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1127 
1128 	inet_twsk_put(tw);
1129 }
1130 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1131 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1132 				  struct request_sock *req)
1133 {
1134 	int l3index;
1135 
1136 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1137 
1138 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1139 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1140 	 */
1141 	/* RFC 7323 2.3
1142 	 * The window field (SEG.WND) of every outgoing segment, with the
1143 	 * exception of <SYN> segments, MUST be right-shifted by
1144 	 * Rcv.Wind.Shift bits:
1145 	 */
1146 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1147 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1148 			tcp_rsk(req)->rcv_nxt,
1149 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1150 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1151 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1152 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1153 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1154 }
1155 
1156 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1157 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1158 {
1159 #ifdef CONFIG_SYN_COOKIES
1160 	const struct tcphdr *th = tcp_hdr(skb);
1161 
1162 	if (!th->syn)
1163 		sk = cookie_v6_check(sk, skb);
1164 #endif
1165 	return sk;
1166 }
1167 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1168 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1169 			 struct tcphdr *th, u32 *cookie)
1170 {
1171 	u16 mss = 0;
1172 #ifdef CONFIG_SYN_COOKIES
1173 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1174 				    &tcp_request_sock_ipv6_ops, sk, th);
1175 	if (mss) {
1176 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1177 		tcp_synq_overflow(sk);
1178 	}
1179 #endif
1180 	return mss;
1181 }
1182 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1183 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1184 {
1185 	if (skb->protocol == htons(ETH_P_IP))
1186 		return tcp_v4_conn_request(sk, skb);
1187 
1188 	if (!ipv6_unicast_destination(skb))
1189 		goto drop;
1190 
1191 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1192 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1193 		return 0;
1194 	}
1195 
1196 	return tcp_conn_request(&tcp6_request_sock_ops,
1197 				&tcp_request_sock_ipv6_ops, sk, skb);
1198 
1199 drop:
1200 	tcp_listendrop(sk);
1201 	return 0; /* don't send reset */
1202 }
1203 
tcp_v6_restore_cb(struct sk_buff * skb)1204 static void tcp_v6_restore_cb(struct sk_buff *skb)
1205 {
1206 	/* We need to move header back to the beginning if xfrm6_policy_check()
1207 	 * and tcp_v6_fill_cb() are going to be called again.
1208 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1209 	 */
1210 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1211 		sizeof(struct inet6_skb_parm));
1212 }
1213 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1214 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1215 					 struct request_sock *req,
1216 					 struct dst_entry *dst,
1217 					 struct request_sock *req_unhash,
1218 					 bool *own_req)
1219 {
1220 	struct inet_request_sock *ireq;
1221 	struct ipv6_pinfo *newnp;
1222 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1223 	struct ipv6_txoptions *opt;
1224 	struct inet_sock *newinet;
1225 	bool found_dup_sk = false;
1226 	struct tcp_sock *newtp;
1227 	struct sock *newsk;
1228 #ifdef CONFIG_TCP_MD5SIG
1229 	struct tcp_md5sig_key *key;
1230 	int l3index;
1231 #endif
1232 	struct flowi6 fl6;
1233 
1234 	if (skb->protocol == htons(ETH_P_IP)) {
1235 		/*
1236 		 *	v6 mapped
1237 		 */
1238 
1239 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1240 					     req_unhash, own_req);
1241 
1242 		if (!newsk)
1243 			return NULL;
1244 
1245 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1246 
1247 		newinet = inet_sk(newsk);
1248 		newnp = tcp_inet6_sk(newsk);
1249 		newtp = tcp_sk(newsk);
1250 
1251 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1252 
1253 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1254 
1255 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1256 		if (sk_is_mptcp(newsk))
1257 			mptcpv6_handle_mapped(newsk, true);
1258 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1259 #ifdef CONFIG_TCP_MD5SIG
1260 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1261 #endif
1262 
1263 		newnp->ipv6_mc_list = NULL;
1264 		newnp->ipv6_ac_list = NULL;
1265 		newnp->ipv6_fl_list = NULL;
1266 		newnp->pktoptions  = NULL;
1267 		newnp->opt	   = NULL;
1268 		newnp->mcast_oif   = inet_iif(skb);
1269 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1270 		newnp->rcv_flowinfo = 0;
1271 		if (np->repflow)
1272 			newnp->flow_label = 0;
1273 
1274 		/*
1275 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1276 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1277 		 * that function for the gory details. -acme
1278 		 */
1279 
1280 		/* It is tricky place. Until this moment IPv4 tcp
1281 		   worked with IPv6 icsk.icsk_af_ops.
1282 		   Sync it now.
1283 		 */
1284 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1285 
1286 		return newsk;
1287 	}
1288 
1289 	ireq = inet_rsk(req);
1290 
1291 	if (sk_acceptq_is_full(sk))
1292 		goto out_overflow;
1293 
1294 	if (!dst) {
1295 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1296 		if (!dst)
1297 			goto out;
1298 	}
1299 
1300 	newsk = tcp_create_openreq_child(sk, req, skb);
1301 	if (!newsk)
1302 		goto out_nonewsk;
1303 
1304 	/*
1305 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1306 	 * count here, tcp_create_openreq_child now does this for us, see the
1307 	 * comment in that function for the gory details. -acme
1308 	 */
1309 
1310 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1311 	ip6_dst_store(newsk, dst, NULL, NULL);
1312 	inet6_sk_rx_dst_set(newsk, skb);
1313 
1314 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1315 
1316 	newtp = tcp_sk(newsk);
1317 	newinet = inet_sk(newsk);
1318 	newnp = tcp_inet6_sk(newsk);
1319 
1320 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1321 
1322 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1323 	newnp->saddr = ireq->ir_v6_loc_addr;
1324 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1325 	newsk->sk_bound_dev_if = ireq->ir_iif;
1326 
1327 	/* Now IPv6 options...
1328 
1329 	   First: no IPv4 options.
1330 	 */
1331 	newinet->inet_opt = NULL;
1332 	newnp->ipv6_mc_list = NULL;
1333 	newnp->ipv6_ac_list = NULL;
1334 	newnp->ipv6_fl_list = NULL;
1335 
1336 	/* Clone RX bits */
1337 	newnp->rxopt.all = np->rxopt.all;
1338 
1339 	newnp->pktoptions = NULL;
1340 	newnp->opt	  = NULL;
1341 	newnp->mcast_oif  = tcp_v6_iif(skb);
1342 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1343 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1344 	if (np->repflow)
1345 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1346 
1347 	/* Set ToS of the new socket based upon the value of incoming SYN.
1348 	 * ECT bits are set later in tcp_init_transfer().
1349 	 */
1350 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1351 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1352 
1353 	/* Clone native IPv6 options from listening socket (if any)
1354 
1355 	   Yes, keeping reference count would be much more clever,
1356 	   but we make one more one thing there: reattach optmem
1357 	   to newsk.
1358 	 */
1359 	opt = ireq->ipv6_opt;
1360 	if (!opt)
1361 		opt = rcu_dereference(np->opt);
1362 	if (opt) {
1363 		opt = ipv6_dup_options(newsk, opt);
1364 		RCU_INIT_POINTER(newnp->opt, opt);
1365 	}
1366 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1367 	if (opt)
1368 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1369 						    opt->opt_flen;
1370 
1371 	tcp_ca_openreq_child(newsk, dst);
1372 
1373 	tcp_sync_mss(newsk, dst_mtu(dst));
1374 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1375 
1376 	tcp_initialize_rcv_mss(newsk);
1377 
1378 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1379 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1380 
1381 #ifdef CONFIG_TCP_MD5SIG
1382 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1383 
1384 	/* Copy over the MD5 key from the original socket */
1385 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1386 	if (key) {
1387 		/* We're using one, so create a matching key
1388 		 * on the newsk structure. If we fail to get
1389 		 * memory, then we end up not copying the key
1390 		 * across. Shucks.
1391 		 */
1392 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1393 			       AF_INET6, 128, l3index, key->key, key->keylen,
1394 			       sk_gfp_mask(sk, GFP_ATOMIC));
1395 	}
1396 #endif
1397 
1398 	if (__inet_inherit_port(sk, newsk) < 0) {
1399 		inet_csk_prepare_forced_close(newsk);
1400 		tcp_done(newsk);
1401 		goto out;
1402 	}
1403 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1404 				       &found_dup_sk);
1405 	if (*own_req) {
1406 		tcp_move_syn(newtp, req);
1407 
1408 		/* Clone pktoptions received with SYN, if we own the req */
1409 		if (ireq->pktopts) {
1410 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1411 			consume_skb(ireq->pktopts);
1412 			ireq->pktopts = NULL;
1413 			if (newnp->pktoptions)
1414 				tcp_v6_restore_cb(newnp->pktoptions);
1415 		}
1416 	} else {
1417 		if (!req_unhash && found_dup_sk) {
1418 			/* This code path should only be executed in the
1419 			 * syncookie case only
1420 			 */
1421 			bh_unlock_sock(newsk);
1422 			sock_put(newsk);
1423 			newsk = NULL;
1424 		}
1425 	}
1426 
1427 	return newsk;
1428 
1429 out_overflow:
1430 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1431 out_nonewsk:
1432 	dst_release(dst);
1433 out:
1434 	tcp_listendrop(sk);
1435 	return NULL;
1436 }
1437 
1438 /* The socket must have it's spinlock held when we get
1439  * here, unless it is a TCP_LISTEN socket.
1440  *
1441  * We have a potential double-lock case here, so even when
1442  * doing backlog processing we use the BH locking scheme.
1443  * This is because we cannot sleep with the original spinlock
1444  * held.
1445  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1446 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1447 {
1448 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1449 	struct sk_buff *opt_skb = NULL;
1450 	struct tcp_sock *tp;
1451 
1452 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1453 	   goes to IPv4 receive handler and backlogged.
1454 	   From backlog it always goes here. Kerboom...
1455 	   Fortunately, tcp_rcv_established and rcv_established
1456 	   handle them correctly, but it is not case with
1457 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1458 	 */
1459 
1460 	if (skb->protocol == htons(ETH_P_IP))
1461 		return tcp_v4_do_rcv(sk, skb);
1462 
1463 	/*
1464 	 *	socket locking is here for SMP purposes as backlog rcv
1465 	 *	is currently called with bh processing disabled.
1466 	 */
1467 
1468 	/* Do Stevens' IPV6_PKTOPTIONS.
1469 
1470 	   Yes, guys, it is the only place in our code, where we
1471 	   may make it not affecting IPv4.
1472 	   The rest of code is protocol independent,
1473 	   and I do not like idea to uglify IPv4.
1474 
1475 	   Actually, all the idea behind IPV6_PKTOPTIONS
1476 	   looks not very well thought. For now we latch
1477 	   options, received in the last packet, enqueued
1478 	   by tcp. Feel free to propose better solution.
1479 					       --ANK (980728)
1480 	 */
1481 	if (np->rxopt.all)
1482 		opt_skb = skb_clone_and_charge_r(skb, sk);
1483 
1484 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1485 		struct dst_entry *dst;
1486 
1487 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1488 						lockdep_sock_is_held(sk));
1489 
1490 		sock_rps_save_rxhash(sk, skb);
1491 		sk_mark_napi_id(sk, skb);
1492 		if (dst) {
1493 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1494 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1495 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1496 				dst_release(dst);
1497 			}
1498 		}
1499 
1500 		tcp_rcv_established(sk, skb);
1501 		if (opt_skb)
1502 			goto ipv6_pktoptions;
1503 		return 0;
1504 	}
1505 
1506 	if (tcp_checksum_complete(skb))
1507 		goto csum_err;
1508 
1509 	if (sk->sk_state == TCP_LISTEN) {
1510 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1511 
1512 		if (!nsk)
1513 			goto discard;
1514 
1515 		if (nsk != sk) {
1516 			if (tcp_child_process(sk, nsk, skb))
1517 				goto reset;
1518 			if (opt_skb)
1519 				__kfree_skb(opt_skb);
1520 			return 0;
1521 		}
1522 	} else
1523 		sock_rps_save_rxhash(sk, skb);
1524 
1525 	if (tcp_rcv_state_process(sk, skb))
1526 		goto reset;
1527 	if (opt_skb)
1528 		goto ipv6_pktoptions;
1529 	return 0;
1530 
1531 reset:
1532 	tcp_v6_send_reset(sk, skb);
1533 discard:
1534 	if (opt_skb)
1535 		__kfree_skb(opt_skb);
1536 	kfree_skb(skb);
1537 	return 0;
1538 csum_err:
1539 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1540 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1541 	goto discard;
1542 
1543 
1544 ipv6_pktoptions:
1545 	/* Do you ask, what is it?
1546 
1547 	   1. skb was enqueued by tcp.
1548 	   2. skb is added to tail of read queue, rather than out of order.
1549 	   3. socket is not in passive state.
1550 	   4. Finally, it really contains options, which user wants to receive.
1551 	 */
1552 	tp = tcp_sk(sk);
1553 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1554 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1555 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1556 			np->mcast_oif = tcp_v6_iif(opt_skb);
1557 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1558 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1559 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1560 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1561 		if (np->repflow)
1562 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1563 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1564 			tcp_v6_restore_cb(opt_skb);
1565 			opt_skb = xchg(&np->pktoptions, opt_skb);
1566 		} else {
1567 			__kfree_skb(opt_skb);
1568 			opt_skb = xchg(&np->pktoptions, NULL);
1569 		}
1570 	}
1571 
1572 	kfree_skb(opt_skb);
1573 	return 0;
1574 }
1575 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1576 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1577 			   const struct tcphdr *th)
1578 {
1579 	/* This is tricky: we move IP6CB at its correct location into
1580 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1581 	 * _decode_session6() uses IP6CB().
1582 	 * barrier() makes sure compiler won't play aliasing games.
1583 	 */
1584 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1585 		sizeof(struct inet6_skb_parm));
1586 	barrier();
1587 
1588 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1589 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1590 				    skb->len - th->doff*4);
1591 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1592 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1593 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1594 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1595 	TCP_SKB_CB(skb)->sacked = 0;
1596 	TCP_SKB_CB(skb)->has_rxtstamp =
1597 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1598 }
1599 
tcp_v6_rcv(struct sk_buff * skb)1600 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1601 {
1602 	struct sk_buff *skb_to_free;
1603 	int sdif = inet6_sdif(skb);
1604 	int dif = inet6_iif(skb);
1605 	const struct tcphdr *th;
1606 	const struct ipv6hdr *hdr;
1607 	bool refcounted;
1608 	struct sock *sk;
1609 	int ret;
1610 	struct net *net = dev_net(skb->dev);
1611 
1612 	if (skb->pkt_type != PACKET_HOST)
1613 		goto discard_it;
1614 
1615 	/*
1616 	 *	Count it even if it's bad.
1617 	 */
1618 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1619 
1620 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1621 		goto discard_it;
1622 
1623 	th = (const struct tcphdr *)skb->data;
1624 
1625 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1626 		goto bad_packet;
1627 	if (!pskb_may_pull(skb, th->doff*4))
1628 		goto discard_it;
1629 
1630 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1631 		goto csum_error;
1632 
1633 	th = (const struct tcphdr *)skb->data;
1634 	hdr = ipv6_hdr(skb);
1635 
1636 lookup:
1637 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1638 				th->source, th->dest, inet6_iif(skb), sdif,
1639 				&refcounted);
1640 	if (!sk)
1641 		goto no_tcp_socket;
1642 
1643 process:
1644 	if (sk->sk_state == TCP_TIME_WAIT)
1645 		goto do_time_wait;
1646 
1647 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1648 		struct request_sock *req = inet_reqsk(sk);
1649 		bool req_stolen = false;
1650 		struct sock *nsk;
1651 
1652 		sk = req->rsk_listener;
1653 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1654 			sk_drops_add(sk, skb);
1655 			reqsk_put(req);
1656 			goto discard_it;
1657 		}
1658 		if (tcp_checksum_complete(skb)) {
1659 			reqsk_put(req);
1660 			goto csum_error;
1661 		}
1662 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1663 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1664 			goto lookup;
1665 		}
1666 		sock_hold(sk);
1667 		refcounted = true;
1668 		nsk = NULL;
1669 		if (!tcp_filter(sk, skb)) {
1670 			th = (const struct tcphdr *)skb->data;
1671 			hdr = ipv6_hdr(skb);
1672 			tcp_v6_fill_cb(skb, hdr, th);
1673 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1674 		}
1675 		if (!nsk) {
1676 			reqsk_put(req);
1677 			if (req_stolen) {
1678 				/* Another cpu got exclusive access to req
1679 				 * and created a full blown socket.
1680 				 * Try to feed this packet to this socket
1681 				 * instead of discarding it.
1682 				 */
1683 				tcp_v6_restore_cb(skb);
1684 				sock_put(sk);
1685 				goto lookup;
1686 			}
1687 			goto discard_and_relse;
1688 		}
1689 		if (nsk == sk) {
1690 			reqsk_put(req);
1691 			tcp_v6_restore_cb(skb);
1692 		} else if (tcp_child_process(sk, nsk, skb)) {
1693 			tcp_v6_send_reset(nsk, skb);
1694 			goto discard_and_relse;
1695 		} else {
1696 			sock_put(sk);
1697 			return 0;
1698 		}
1699 	}
1700 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1701 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1702 		goto discard_and_relse;
1703 	}
1704 
1705 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1706 		goto discard_and_relse;
1707 
1708 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1709 		goto discard_and_relse;
1710 
1711 	if (tcp_filter(sk, skb))
1712 		goto discard_and_relse;
1713 	th = (const struct tcphdr *)skb->data;
1714 	hdr = ipv6_hdr(skb);
1715 	tcp_v6_fill_cb(skb, hdr, th);
1716 
1717 	skb->dev = NULL;
1718 
1719 	if (sk->sk_state == TCP_LISTEN) {
1720 		ret = tcp_v6_do_rcv(sk, skb);
1721 		goto put_and_return;
1722 	}
1723 
1724 	sk_incoming_cpu_update(sk);
1725 
1726 	bh_lock_sock_nested(sk);
1727 	tcp_segs_in(tcp_sk(sk), skb);
1728 	ret = 0;
1729 	if (!sock_owned_by_user(sk)) {
1730 		skb_to_free = sk->sk_rx_skb_cache;
1731 		sk->sk_rx_skb_cache = NULL;
1732 		ret = tcp_v6_do_rcv(sk, skb);
1733 	} else {
1734 		if (tcp_add_backlog(sk, skb))
1735 			goto discard_and_relse;
1736 		skb_to_free = NULL;
1737 	}
1738 	bh_unlock_sock(sk);
1739 	if (skb_to_free)
1740 		__kfree_skb(skb_to_free);
1741 put_and_return:
1742 	if (refcounted)
1743 		sock_put(sk);
1744 	return ret ? -1 : 0;
1745 
1746 no_tcp_socket:
1747 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1748 		goto discard_it;
1749 
1750 	tcp_v6_fill_cb(skb, hdr, th);
1751 
1752 	if (tcp_checksum_complete(skb)) {
1753 csum_error:
1754 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1755 bad_packet:
1756 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1757 	} else {
1758 		tcp_v6_send_reset(NULL, skb);
1759 	}
1760 
1761 discard_it:
1762 	kfree_skb(skb);
1763 	return 0;
1764 
1765 discard_and_relse:
1766 	sk_drops_add(sk, skb);
1767 	if (refcounted)
1768 		sock_put(sk);
1769 	goto discard_it;
1770 
1771 do_time_wait:
1772 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1773 		inet_twsk_put(inet_twsk(sk));
1774 		goto discard_it;
1775 	}
1776 
1777 	tcp_v6_fill_cb(skb, hdr, th);
1778 
1779 	if (tcp_checksum_complete(skb)) {
1780 		inet_twsk_put(inet_twsk(sk));
1781 		goto csum_error;
1782 	}
1783 
1784 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1785 	case TCP_TW_SYN:
1786 	{
1787 		struct sock *sk2;
1788 
1789 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1790 					    skb, __tcp_hdrlen(th),
1791 					    &ipv6_hdr(skb)->saddr, th->source,
1792 					    &ipv6_hdr(skb)->daddr,
1793 					    ntohs(th->dest),
1794 					    tcp_v6_iif_l3_slave(skb),
1795 					    sdif);
1796 		if (sk2) {
1797 			struct inet_timewait_sock *tw = inet_twsk(sk);
1798 			inet_twsk_deschedule_put(tw);
1799 			sk = sk2;
1800 			tcp_v6_restore_cb(skb);
1801 			refcounted = false;
1802 			goto process;
1803 		}
1804 	}
1805 		/* to ACK */
1806 		fallthrough;
1807 	case TCP_TW_ACK:
1808 		tcp_v6_timewait_ack(sk, skb);
1809 		break;
1810 	case TCP_TW_RST:
1811 		tcp_v6_send_reset(sk, skb);
1812 		inet_twsk_deschedule_put(inet_twsk(sk));
1813 		goto discard_it;
1814 	case TCP_TW_SUCCESS:
1815 		;
1816 	}
1817 	goto discard_it;
1818 }
1819 
tcp_v6_early_demux(struct sk_buff * skb)1820 void tcp_v6_early_demux(struct sk_buff *skb)
1821 {
1822 	const struct ipv6hdr *hdr;
1823 	const struct tcphdr *th;
1824 	struct sock *sk;
1825 
1826 	if (skb->pkt_type != PACKET_HOST)
1827 		return;
1828 
1829 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1830 		return;
1831 
1832 	hdr = ipv6_hdr(skb);
1833 	th = tcp_hdr(skb);
1834 
1835 	if (th->doff < sizeof(struct tcphdr) / 4)
1836 		return;
1837 
1838 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1839 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1840 					&hdr->saddr, th->source,
1841 					&hdr->daddr, ntohs(th->dest),
1842 					inet6_iif(skb), inet6_sdif(skb));
1843 	if (sk) {
1844 		skb->sk = sk;
1845 		skb->destructor = sock_edemux;
1846 		if (sk_fullsock(sk)) {
1847 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1848 
1849 			if (dst)
1850 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1851 			if (dst &&
1852 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1853 				skb_dst_set_noref(skb, dst);
1854 		}
1855 	}
1856 }
1857 
1858 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1859 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1860 	.twsk_unique	= tcp_twsk_unique,
1861 	.twsk_destructor = tcp_twsk_destructor,
1862 };
1863 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1864 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1865 {
1866 	struct ipv6_pinfo *np = inet6_sk(sk);
1867 
1868 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1869 }
1870 
1871 const struct inet_connection_sock_af_ops ipv6_specific = {
1872 	.queue_xmit	   = inet6_csk_xmit,
1873 	.send_check	   = tcp_v6_send_check,
1874 	.rebuild_header	   = inet6_sk_rebuild_header,
1875 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1876 	.conn_request	   = tcp_v6_conn_request,
1877 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1878 	.net_header_len	   = sizeof(struct ipv6hdr),
1879 	.net_frag_header_len = sizeof(struct frag_hdr),
1880 	.setsockopt	   = ipv6_setsockopt,
1881 	.getsockopt	   = ipv6_getsockopt,
1882 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1883 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1884 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1885 };
1886 
1887 #ifdef CONFIG_TCP_MD5SIG
1888 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1889 	.md5_lookup	=	tcp_v6_md5_lookup,
1890 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1891 	.md5_parse	=	tcp_v6_parse_md5_keys,
1892 };
1893 #endif
1894 
1895 /*
1896  *	TCP over IPv4 via INET6 API
1897  */
1898 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1899 	.queue_xmit	   = ip_queue_xmit,
1900 	.send_check	   = tcp_v4_send_check,
1901 	.rebuild_header	   = inet_sk_rebuild_header,
1902 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1903 	.conn_request	   = tcp_v6_conn_request,
1904 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1905 	.net_header_len	   = sizeof(struct iphdr),
1906 	.setsockopt	   = ipv6_setsockopt,
1907 	.getsockopt	   = ipv6_getsockopt,
1908 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1909 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1910 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1911 };
1912 
1913 #ifdef CONFIG_TCP_MD5SIG
1914 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1915 	.md5_lookup	=	tcp_v4_md5_lookup,
1916 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1917 	.md5_parse	=	tcp_v6_parse_md5_keys,
1918 };
1919 #endif
1920 
1921 /* NOTE: A lot of things set to zero explicitly by call to
1922  *       sk_alloc() so need not be done here.
1923  */
tcp_v6_init_sock(struct sock * sk)1924 static int tcp_v6_init_sock(struct sock *sk)
1925 {
1926 	struct inet_connection_sock *icsk = inet_csk(sk);
1927 
1928 	tcp_init_sock(sk);
1929 
1930 	icsk->icsk_af_ops = &ipv6_specific;
1931 
1932 #ifdef CONFIG_TCP_MD5SIG
1933 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1934 #endif
1935 
1936 	return 0;
1937 }
1938 
1939 #ifdef CONFIG_PROC_FS
1940 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1941 static void get_openreq6(struct seq_file *seq,
1942 			 const struct request_sock *req, int i)
1943 {
1944 	long ttd = req->rsk_timer.expires - jiffies;
1945 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1946 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1947 
1948 	if (ttd < 0)
1949 		ttd = 0;
1950 
1951 	seq_printf(seq,
1952 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1953 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1954 		   i,
1955 		   src->s6_addr32[0], src->s6_addr32[1],
1956 		   src->s6_addr32[2], src->s6_addr32[3],
1957 		   inet_rsk(req)->ir_num,
1958 		   dest->s6_addr32[0], dest->s6_addr32[1],
1959 		   dest->s6_addr32[2], dest->s6_addr32[3],
1960 		   ntohs(inet_rsk(req)->ir_rmt_port),
1961 		   TCP_SYN_RECV,
1962 		   0, 0, /* could print option size, but that is af dependent. */
1963 		   1,   /* timers active (only the expire timer) */
1964 		   jiffies_to_clock_t(ttd),
1965 		   req->num_timeout,
1966 		   from_kuid_munged(seq_user_ns(seq),
1967 				    sock_i_uid(req->rsk_listener)),
1968 		   0,  /* non standard timer */
1969 		   0, /* open_requests have no inode */
1970 		   0, req);
1971 }
1972 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1973 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1974 {
1975 	const struct in6_addr *dest, *src;
1976 	__u16 destp, srcp;
1977 	int timer_active;
1978 	unsigned long timer_expires;
1979 	const struct inet_sock *inet = inet_sk(sp);
1980 	const struct tcp_sock *tp = tcp_sk(sp);
1981 	const struct inet_connection_sock *icsk = inet_csk(sp);
1982 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1983 	int rx_queue;
1984 	int state;
1985 
1986 	dest  = &sp->sk_v6_daddr;
1987 	src   = &sp->sk_v6_rcv_saddr;
1988 	destp = ntohs(inet->inet_dport);
1989 	srcp  = ntohs(inet->inet_sport);
1990 
1991 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1992 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1993 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1994 		timer_active	= 1;
1995 		timer_expires	= icsk->icsk_timeout;
1996 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1997 		timer_active	= 4;
1998 		timer_expires	= icsk->icsk_timeout;
1999 	} else if (timer_pending(&sp->sk_timer)) {
2000 		timer_active	= 2;
2001 		timer_expires	= sp->sk_timer.expires;
2002 	} else {
2003 		timer_active	= 0;
2004 		timer_expires = jiffies;
2005 	}
2006 
2007 	state = inet_sk_state_load(sp);
2008 	if (state == TCP_LISTEN)
2009 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2010 	else
2011 		/* Because we don't lock the socket,
2012 		 * we might find a transient negative value.
2013 		 */
2014 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2015 				      READ_ONCE(tp->copied_seq), 0);
2016 
2017 	seq_printf(seq,
2018 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2019 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2020 		   i,
2021 		   src->s6_addr32[0], src->s6_addr32[1],
2022 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2023 		   dest->s6_addr32[0], dest->s6_addr32[1],
2024 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2025 		   state,
2026 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2027 		   rx_queue,
2028 		   timer_active,
2029 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2030 		   icsk->icsk_retransmits,
2031 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2032 		   icsk->icsk_probes_out,
2033 		   sock_i_ino(sp),
2034 		   refcount_read(&sp->sk_refcnt), sp,
2035 		   jiffies_to_clock_t(icsk->icsk_rto),
2036 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2037 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2038 		   tp->snd_cwnd,
2039 		   state == TCP_LISTEN ?
2040 			fastopenq->max_qlen :
2041 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2042 		   );
2043 }
2044 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2045 static void get_timewait6_sock(struct seq_file *seq,
2046 			       struct inet_timewait_sock *tw, int i)
2047 {
2048 	long delta = tw->tw_timer.expires - jiffies;
2049 	const struct in6_addr *dest, *src;
2050 	__u16 destp, srcp;
2051 
2052 	dest = &tw->tw_v6_daddr;
2053 	src  = &tw->tw_v6_rcv_saddr;
2054 	destp = ntohs(tw->tw_dport);
2055 	srcp  = ntohs(tw->tw_sport);
2056 
2057 	seq_printf(seq,
2058 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2059 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2060 		   i,
2061 		   src->s6_addr32[0], src->s6_addr32[1],
2062 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2063 		   dest->s6_addr32[0], dest->s6_addr32[1],
2064 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2065 		   tw->tw_substate, 0, 0,
2066 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2067 		   refcount_read(&tw->tw_refcnt), tw);
2068 }
2069 
tcp6_seq_show(struct seq_file * seq,void * v)2070 static int tcp6_seq_show(struct seq_file *seq, void *v)
2071 {
2072 	struct tcp_iter_state *st;
2073 	struct sock *sk = v;
2074 
2075 	if (v == SEQ_START_TOKEN) {
2076 		seq_puts(seq,
2077 			 "  sl  "
2078 			 "local_address                         "
2079 			 "remote_address                        "
2080 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2081 			 "   uid  timeout inode\n");
2082 		goto out;
2083 	}
2084 	st = seq->private;
2085 
2086 	if (sk->sk_state == TCP_TIME_WAIT)
2087 		get_timewait6_sock(seq, v, st->num);
2088 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2089 		get_openreq6(seq, v, st->num);
2090 	else
2091 		get_tcp6_sock(seq, v, st->num);
2092 out:
2093 	return 0;
2094 }
2095 
2096 static const struct seq_operations tcp6_seq_ops = {
2097 	.show		= tcp6_seq_show,
2098 	.start		= tcp_seq_start,
2099 	.next		= tcp_seq_next,
2100 	.stop		= tcp_seq_stop,
2101 };
2102 
2103 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2104 	.family		= AF_INET6,
2105 };
2106 
tcp6_proc_init(struct net * net)2107 int __net_init tcp6_proc_init(struct net *net)
2108 {
2109 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2110 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2111 		return -ENOMEM;
2112 	return 0;
2113 }
2114 
tcp6_proc_exit(struct net * net)2115 void tcp6_proc_exit(struct net *net)
2116 {
2117 	remove_proc_entry("tcp6", net->proc_net);
2118 }
2119 #endif
2120 
2121 struct proto tcpv6_prot = {
2122 	.name			= "TCPv6",
2123 	.owner			= THIS_MODULE,
2124 	.close			= tcp_close,
2125 	.pre_connect		= tcp_v6_pre_connect,
2126 	.connect		= tcp_v6_connect,
2127 	.disconnect		= tcp_disconnect,
2128 	.accept			= inet_csk_accept,
2129 	.ioctl			= tcp_ioctl,
2130 	.init			= tcp_v6_init_sock,
2131 	.destroy		= tcp_v4_destroy_sock,
2132 	.shutdown		= tcp_shutdown,
2133 	.setsockopt		= tcp_setsockopt,
2134 	.getsockopt		= tcp_getsockopt,
2135 	.keepalive		= tcp_set_keepalive,
2136 	.recvmsg		= tcp_recvmsg,
2137 	.sendmsg		= tcp_sendmsg,
2138 	.sendpage		= tcp_sendpage,
2139 	.backlog_rcv		= tcp_v6_do_rcv,
2140 	.release_cb		= tcp_release_cb,
2141 	.hash			= inet6_hash,
2142 	.unhash			= inet_unhash,
2143 	.get_port		= inet_csk_get_port,
2144 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2145 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2146 	.stream_memory_free	= tcp_stream_memory_free,
2147 	.sockets_allocated	= &tcp_sockets_allocated,
2148 	.memory_allocated	= &tcp_memory_allocated,
2149 	.memory_pressure	= &tcp_memory_pressure,
2150 	.orphan_count		= &tcp_orphan_count,
2151 	.sysctl_mem		= sysctl_tcp_mem,
2152 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2153 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2154 	.max_header		= MAX_TCP_HEADER,
2155 	.obj_size		= sizeof(struct tcp6_sock),
2156 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2157 	.twsk_prot		= &tcp6_timewait_sock_ops,
2158 	.rsk_prot		= &tcp6_request_sock_ops,
2159 	.h.hashinfo		= &tcp_hashinfo,
2160 	.no_autobind		= true,
2161 	.diag_destroy		= tcp_abort,
2162 };
2163 EXPORT_SYMBOL_GPL(tcpv6_prot);
2164 
2165 static const struct inet6_protocol tcpv6_protocol = {
2166 	.handler	=	tcp_v6_rcv,
2167 	.err_handler	=	tcp_v6_err,
2168 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2169 };
2170 
2171 static struct inet_protosw tcpv6_protosw = {
2172 	.type		=	SOCK_STREAM,
2173 	.protocol	=	IPPROTO_TCP,
2174 	.prot		=	&tcpv6_prot,
2175 	.ops		=	&inet6_stream_ops,
2176 	.flags		=	INET_PROTOSW_PERMANENT |
2177 				INET_PROTOSW_ICSK,
2178 };
2179 
tcpv6_net_init(struct net * net)2180 static int __net_init tcpv6_net_init(struct net *net)
2181 {
2182 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2183 				    SOCK_RAW, IPPROTO_TCP, net);
2184 }
2185 
tcpv6_net_exit(struct net * net)2186 static void __net_exit tcpv6_net_exit(struct net *net)
2187 {
2188 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2189 }
2190 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2191 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2192 {
2193 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2194 }
2195 
2196 static struct pernet_operations tcpv6_net_ops = {
2197 	.init	    = tcpv6_net_init,
2198 	.exit	    = tcpv6_net_exit,
2199 	.exit_batch = tcpv6_net_exit_batch,
2200 };
2201 
tcpv6_init(void)2202 int __init tcpv6_init(void)
2203 {
2204 	int ret;
2205 
2206 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2207 	if (ret)
2208 		goto out;
2209 
2210 	/* register inet6 protocol */
2211 	ret = inet6_register_protosw(&tcpv6_protosw);
2212 	if (ret)
2213 		goto out_tcpv6_protocol;
2214 
2215 	ret = register_pernet_subsys(&tcpv6_net_ops);
2216 	if (ret)
2217 		goto out_tcpv6_protosw;
2218 
2219 	ret = mptcpv6_init();
2220 	if (ret)
2221 		goto out_tcpv6_pernet_subsys;
2222 
2223 out:
2224 	return ret;
2225 
2226 out_tcpv6_pernet_subsys:
2227 	unregister_pernet_subsys(&tcpv6_net_ops);
2228 out_tcpv6_protosw:
2229 	inet6_unregister_protosw(&tcpv6_protosw);
2230 out_tcpv6_protocol:
2231 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2232 	goto out;
2233 }
2234 
tcpv6_exit(void)2235 void tcpv6_exit(void)
2236 {
2237 	unregister_pernet_subsys(&tcpv6_net_ops);
2238 	inet6_unregister_protosw(&tcpv6_protosw);
2239 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2240 }
2241