• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		sk->sk_rx_dst = dst;
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
241 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
242 		if (sk_is_mptcp(sk))
243 			mptcpv6_handle_mapped(sk, true);
244 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247 #endif
248 
249 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250 
251 		if (err) {
252 			icsk->icsk_ext_hdr_len = exthdrlen;
253 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
254 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
255 			if (sk_is_mptcp(sk))
256 				mptcpv6_handle_mapped(sk, false);
257 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
258 #ifdef CONFIG_TCP_MD5SIG
259 			tp->af_specific = &tcp_sock_ipv6_specific;
260 #endif
261 			goto failure;
262 		}
263 		np->saddr = sk->sk_v6_rcv_saddr;
264 
265 		return err;
266 	}
267 
268 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269 		saddr = &sk->sk_v6_rcv_saddr;
270 
271 	fl6.flowi6_proto = IPPROTO_TCP;
272 	fl6.daddr = sk->sk_v6_daddr;
273 	fl6.saddr = saddr ? *saddr : np->saddr;
274 	fl6.flowi6_oif = sk->sk_bound_dev_if;
275 	fl6.flowi6_mark = sk->sk_mark;
276 	fl6.fl6_dport = usin->sin6_port;
277 	fl6.fl6_sport = inet->inet_sport;
278 	fl6.flowi6_uid = sk->sk_uid;
279 
280 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
281 	final_p = fl6_update_dst(&fl6, opt, &final);
282 
283 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
284 
285 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
286 	if (IS_ERR(dst)) {
287 		err = PTR_ERR(dst);
288 		goto failure;
289 	}
290 
291 	if (!saddr) {
292 		saddr = &fl6.saddr;
293 		sk->sk_v6_rcv_saddr = *saddr;
294 	}
295 
296 	/* set the source address */
297 	np->saddr = *saddr;
298 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
299 
300 	sk->sk_gso_type = SKB_GSO_TCPV6;
301 	ip6_dst_store(sk, dst, NULL, NULL);
302 
303 	icsk->icsk_ext_hdr_len = 0;
304 	if (opt)
305 		icsk->icsk_ext_hdr_len = opt->opt_flen +
306 					 opt->opt_nflen;
307 
308 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
309 
310 	inet->inet_dport = usin->sin6_port;
311 
312 	tcp_set_state(sk, TCP_SYN_SENT);
313 	err = inet6_hash_connect(tcp_death_row, sk);
314 	if (err)
315 		goto late_failure;
316 
317 	sk_set_txhash(sk);
318 
319 	if (likely(!tp->repair)) {
320 		if (!tp->write_seq)
321 			WRITE_ONCE(tp->write_seq,
322 				   secure_tcpv6_seq(np->saddr.s6_addr32,
323 						    sk->sk_v6_daddr.s6_addr32,
324 						    inet->inet_sport,
325 						    inet->inet_dport));
326 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
327 						   np->saddr.s6_addr32,
328 						   sk->sk_v6_daddr.s6_addr32);
329 	}
330 
331 	if (tcp_fastopen_defer_connect(sk, &err))
332 		return err;
333 	if (err)
334 		goto late_failure;
335 
336 	err = tcp_connect(sk);
337 	if (err)
338 		goto late_failure;
339 
340 	return 0;
341 
342 late_failure:
343 	tcp_set_state(sk, TCP_CLOSE);
344 failure:
345 	inet->inet_dport = 0;
346 	sk->sk_route_caps = 0;
347 	return err;
348 }
349 
tcp_v6_mtu_reduced(struct sock * sk)350 static void tcp_v6_mtu_reduced(struct sock *sk)
351 {
352 	struct dst_entry *dst;
353 	u32 mtu;
354 
355 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
356 		return;
357 
358 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
359 
360 	/* Drop requests trying to increase our current mss.
361 	 * Check done in __ip6_rt_update_pmtu() is too late.
362 	 */
363 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
364 		return;
365 
366 	dst = inet6_csk_update_pmtu(sk, mtu);
367 	if (!dst)
368 		return;
369 
370 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
371 		tcp_sync_mss(sk, dst_mtu(dst));
372 		tcp_simple_retransmit(sk);
373 	}
374 }
375 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)376 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
377 		u8 type, u8 code, int offset, __be32 info)
378 {
379 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
380 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
381 	struct net *net = dev_net(skb->dev);
382 	struct request_sock *fastopen;
383 	struct ipv6_pinfo *np;
384 	struct tcp_sock *tp;
385 	__u32 seq, snd_una;
386 	struct sock *sk;
387 	bool fatal;
388 	int err;
389 
390 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
391 					&hdr->daddr, th->dest,
392 					&hdr->saddr, ntohs(th->source),
393 					skb->dev->ifindex, inet6_sdif(skb));
394 
395 	if (!sk) {
396 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
397 				  ICMP6_MIB_INERRORS);
398 		return -ENOENT;
399 	}
400 
401 	if (sk->sk_state == TCP_TIME_WAIT) {
402 		inet_twsk_put(inet_twsk(sk));
403 		return 0;
404 	}
405 	seq = ntohl(th->seq);
406 	fatal = icmpv6_err_convert(type, code, &err);
407 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
408 		tcp_req_err(sk, seq, fatal);
409 		return 0;
410 	}
411 
412 	bh_lock_sock(sk);
413 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
414 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
415 
416 	if (sk->sk_state == TCP_CLOSE)
417 		goto out;
418 
419 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
420 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
421 		goto out;
422 	}
423 
424 	tp = tcp_sk(sk);
425 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
426 	fastopen = rcu_dereference(tp->fastopen_rsk);
427 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
428 	if (sk->sk_state != TCP_LISTEN &&
429 	    !between(seq, snd_una, tp->snd_nxt)) {
430 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
431 		goto out;
432 	}
433 
434 	np = tcp_inet6_sk(sk);
435 
436 	if (type == NDISC_REDIRECT) {
437 		if (!sock_owned_by_user(sk)) {
438 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
439 
440 			if (dst)
441 				dst->ops->redirect(dst, sk, skb);
442 		}
443 		goto out;
444 	}
445 
446 	if (type == ICMPV6_PKT_TOOBIG) {
447 		u32 mtu = ntohl(info);
448 
449 		/* We are not interested in TCP_LISTEN and open_requests
450 		 * (SYN-ACKs send out by Linux are always <576bytes so
451 		 * they should go through unfragmented).
452 		 */
453 		if (sk->sk_state == TCP_LISTEN)
454 			goto out;
455 
456 		if (!ip6_sk_accept_pmtu(sk))
457 			goto out;
458 
459 		if (mtu < IPV6_MIN_MTU)
460 			goto out;
461 
462 		WRITE_ONCE(tp->mtu_info, mtu);
463 
464 		if (!sock_owned_by_user(sk))
465 			tcp_v6_mtu_reduced(sk);
466 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
467 					   &sk->sk_tsq_flags))
468 			sock_hold(sk);
469 		goto out;
470 	}
471 
472 
473 	/* Might be for an request_sock */
474 	switch (sk->sk_state) {
475 	case TCP_SYN_SENT:
476 	case TCP_SYN_RECV:
477 		/* Only in fast or simultaneous open. If a fast open socket is
478 		 * already accepted it is treated as a connected one below.
479 		 */
480 		if (fastopen && !fastopen->sk)
481 			break;
482 
483 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
484 
485 		if (!sock_owned_by_user(sk)) {
486 			sk->sk_err = err;
487 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
488 
489 			tcp_done(sk);
490 		} else
491 			sk->sk_err_soft = err;
492 		goto out;
493 	case TCP_LISTEN:
494 		break;
495 	default:
496 		/* check if this ICMP message allows revert of backoff.
497 		 * (see RFC 6069)
498 		 */
499 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
500 		    code == ICMPV6_NOROUTE)
501 			tcp_ld_RTO_revert(sk, seq);
502 	}
503 
504 	if (!sock_owned_by_user(sk) && np->recverr) {
505 		sk->sk_err = err;
506 		sk->sk_error_report(sk);
507 	} else
508 		sk->sk_err_soft = err;
509 
510 out:
511 	bh_unlock_sock(sk);
512 	sock_put(sk);
513 	return 0;
514 }
515 
516 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)517 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
518 			      struct flowi *fl,
519 			      struct request_sock *req,
520 			      struct tcp_fastopen_cookie *foc,
521 			      enum tcp_synack_type synack_type,
522 			      struct sk_buff *syn_skb)
523 {
524 	struct inet_request_sock *ireq = inet_rsk(req);
525 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
526 	struct ipv6_txoptions *opt;
527 	struct flowi6 *fl6 = &fl->u.ip6;
528 	struct sk_buff *skb;
529 	int err = -ENOMEM;
530 	u8 tclass;
531 
532 	/* First, grab a route. */
533 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
534 					       IPPROTO_TCP)) == NULL)
535 		goto done;
536 
537 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
538 
539 	if (skb) {
540 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
541 				    &ireq->ir_v6_rmt_addr);
542 
543 		fl6->daddr = ireq->ir_v6_rmt_addr;
544 		if (np->repflow && ireq->pktopts)
545 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
546 
547 		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
548 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
549 				(np->tclass & INET_ECN_MASK) :
550 				np->tclass;
551 
552 		if (!INET_ECN_is_capable(tclass) &&
553 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
554 			tclass |= INET_ECN_ECT_0;
555 
556 		rcu_read_lock();
557 		opt = ireq->ipv6_opt;
558 		if (!opt)
559 			opt = rcu_dereference(np->opt);
560 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
561 			       tclass, sk->sk_priority);
562 		rcu_read_unlock();
563 		err = net_xmit_eval(err);
564 	}
565 
566 done:
567 	return err;
568 }
569 
570 
tcp_v6_reqsk_destructor(struct request_sock * req)571 static void tcp_v6_reqsk_destructor(struct request_sock *req)
572 {
573 	kfree(inet_rsk(req)->ipv6_opt);
574 	kfree_skb(inet_rsk(req)->pktopts);
575 }
576 
577 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)578 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
579 						   const struct in6_addr *addr,
580 						   int l3index)
581 {
582 	return tcp_md5_do_lookup(sk, l3index,
583 				 (union tcp_md5_addr *)addr, AF_INET6);
584 }
585 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)586 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
587 						const struct sock *addr_sk)
588 {
589 	int l3index;
590 
591 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
592 						 addr_sk->sk_bound_dev_if);
593 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
594 				    l3index);
595 }
596 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)597 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
598 				 sockptr_t optval, int optlen)
599 {
600 	struct tcp_md5sig cmd;
601 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
602 	int l3index = 0;
603 	u8 prefixlen;
604 
605 	if (optlen < sizeof(cmd))
606 		return -EINVAL;
607 
608 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
609 		return -EFAULT;
610 
611 	if (sin6->sin6_family != AF_INET6)
612 		return -EINVAL;
613 
614 	if (optname == TCP_MD5SIG_EXT &&
615 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
616 		prefixlen = cmd.tcpm_prefixlen;
617 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
618 					prefixlen > 32))
619 			return -EINVAL;
620 	} else {
621 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
622 	}
623 
624 	if (optname == TCP_MD5SIG_EXT &&
625 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
626 		struct net_device *dev;
627 
628 		rcu_read_lock();
629 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
630 		if (dev && netif_is_l3_master(dev))
631 			l3index = dev->ifindex;
632 		rcu_read_unlock();
633 
634 		/* ok to reference set/not set outside of rcu;
635 		 * right now device MUST be an L3 master
636 		 */
637 		if (!dev || !l3index)
638 			return -EINVAL;
639 	}
640 
641 	if (!cmd.tcpm_keylen) {
642 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
643 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
644 					      AF_INET, prefixlen,
645 					      l3index);
646 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
647 				      AF_INET6, prefixlen, l3index);
648 	}
649 
650 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
651 		return -EINVAL;
652 
653 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
654 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
655 				      AF_INET, prefixlen, l3index,
656 				      cmd.tcpm_key, cmd.tcpm_keylen,
657 				      GFP_KERNEL);
658 
659 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
660 			      AF_INET6, prefixlen, l3index,
661 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
662 }
663 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)664 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
665 				   const struct in6_addr *daddr,
666 				   const struct in6_addr *saddr,
667 				   const struct tcphdr *th, int nbytes)
668 {
669 	struct tcp6_pseudohdr *bp;
670 	struct scatterlist sg;
671 	struct tcphdr *_th;
672 
673 	bp = hp->scratch;
674 	/* 1. TCP pseudo-header (RFC2460) */
675 	bp->saddr = *saddr;
676 	bp->daddr = *daddr;
677 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
678 	bp->len = cpu_to_be32(nbytes);
679 
680 	_th = (struct tcphdr *)(bp + 1);
681 	memcpy(_th, th, sizeof(*th));
682 	_th->check = 0;
683 
684 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
685 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
686 				sizeof(*bp) + sizeof(*th));
687 	return crypto_ahash_update(hp->md5_req);
688 }
689 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)690 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
691 			       const struct in6_addr *daddr, struct in6_addr *saddr,
692 			       const struct tcphdr *th)
693 {
694 	struct tcp_md5sig_pool *hp;
695 	struct ahash_request *req;
696 
697 	hp = tcp_get_md5sig_pool();
698 	if (!hp)
699 		goto clear_hash_noput;
700 	req = hp->md5_req;
701 
702 	if (crypto_ahash_init(req))
703 		goto clear_hash;
704 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
705 		goto clear_hash;
706 	if (tcp_md5_hash_key(hp, key))
707 		goto clear_hash;
708 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
709 	if (crypto_ahash_final(req))
710 		goto clear_hash;
711 
712 	tcp_put_md5sig_pool();
713 	return 0;
714 
715 clear_hash:
716 	tcp_put_md5sig_pool();
717 clear_hash_noput:
718 	memset(md5_hash, 0, 16);
719 	return 1;
720 }
721 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)722 static int tcp_v6_md5_hash_skb(char *md5_hash,
723 			       const struct tcp_md5sig_key *key,
724 			       const struct sock *sk,
725 			       const struct sk_buff *skb)
726 {
727 	const struct in6_addr *saddr, *daddr;
728 	struct tcp_md5sig_pool *hp;
729 	struct ahash_request *req;
730 	const struct tcphdr *th = tcp_hdr(skb);
731 
732 	if (sk) { /* valid for establish/request sockets */
733 		saddr = &sk->sk_v6_rcv_saddr;
734 		daddr = &sk->sk_v6_daddr;
735 	} else {
736 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
737 		saddr = &ip6h->saddr;
738 		daddr = &ip6h->daddr;
739 	}
740 
741 	hp = tcp_get_md5sig_pool();
742 	if (!hp)
743 		goto clear_hash_noput;
744 	req = hp->md5_req;
745 
746 	if (crypto_ahash_init(req))
747 		goto clear_hash;
748 
749 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
750 		goto clear_hash;
751 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
752 		goto clear_hash;
753 	if (tcp_md5_hash_key(hp, key))
754 		goto clear_hash;
755 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
756 	if (crypto_ahash_final(req))
757 		goto clear_hash;
758 
759 	tcp_put_md5sig_pool();
760 	return 0;
761 
762 clear_hash:
763 	tcp_put_md5sig_pool();
764 clear_hash_noput:
765 	memset(md5_hash, 0, 16);
766 	return 1;
767 }
768 
769 #endif
770 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)771 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
772 				    const struct sk_buff *skb,
773 				    int dif, int sdif)
774 {
775 #ifdef CONFIG_TCP_MD5SIG
776 	const __u8 *hash_location = NULL;
777 	struct tcp_md5sig_key *hash_expected;
778 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
779 	const struct tcphdr *th = tcp_hdr(skb);
780 	int genhash, l3index;
781 	u8 newhash[16];
782 
783 	/* sdif set, means packet ingressed via a device
784 	 * in an L3 domain and dif is set to the l3mdev
785 	 */
786 	l3index = sdif ? dif : 0;
787 
788 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
789 	hash_location = tcp_parse_md5sig_option(th);
790 
791 	/* We've parsed the options - do we have a hash? */
792 	if (!hash_expected && !hash_location)
793 		return false;
794 
795 	if (hash_expected && !hash_location) {
796 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
797 		return true;
798 	}
799 
800 	if (!hash_expected && hash_location) {
801 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
802 		return true;
803 	}
804 
805 	/* check the signature */
806 	genhash = tcp_v6_md5_hash_skb(newhash,
807 				      hash_expected,
808 				      NULL, skb);
809 
810 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
811 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
812 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
813 				     genhash ? "failed" : "mismatch",
814 				     &ip6h->saddr, ntohs(th->source),
815 				     &ip6h->daddr, ntohs(th->dest), l3index);
816 		return true;
817 	}
818 #endif
819 	return false;
820 }
821 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)822 static void tcp_v6_init_req(struct request_sock *req,
823 			    const struct sock *sk_listener,
824 			    struct sk_buff *skb)
825 {
826 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
827 	struct inet_request_sock *ireq = inet_rsk(req);
828 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
829 
830 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
831 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
832 
833 	/* So that link locals have meaning */
834 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
835 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
836 		ireq->ir_iif = tcp_v6_iif(skb);
837 
838 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
839 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
840 	     np->rxopt.bits.rxinfo ||
841 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
842 	     np->rxopt.bits.rxohlim || np->repflow)) {
843 		refcount_inc(&skb->users);
844 		ireq->pktopts = skb;
845 	}
846 }
847 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)848 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
849 					  struct flowi *fl,
850 					  const struct request_sock *req)
851 {
852 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
853 }
854 
855 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
856 	.family		=	AF_INET6,
857 	.obj_size	=	sizeof(struct tcp6_request_sock),
858 	.rtx_syn_ack	=	tcp_rtx_synack,
859 	.send_ack	=	tcp_v6_reqsk_send_ack,
860 	.destructor	=	tcp_v6_reqsk_destructor,
861 	.send_reset	=	tcp_v6_send_reset,
862 	.syn_ack_timeout =	tcp_syn_ack_timeout,
863 };
864 
865 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
866 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
867 				sizeof(struct ipv6hdr),
868 #ifdef CONFIG_TCP_MD5SIG
869 	.req_md5_lookup	=	tcp_v6_md5_lookup,
870 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
871 #endif
872 	.init_req	=	tcp_v6_init_req,
873 #ifdef CONFIG_SYN_COOKIES
874 	.cookie_init_seq =	cookie_v6_init_sequence,
875 #endif
876 	.route_req	=	tcp_v6_route_req,
877 	.init_seq	=	tcp_v6_init_seq,
878 	.init_ts_off	=	tcp_v6_init_ts_off,
879 	.send_synack	=	tcp_v6_send_synack,
880 };
881 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)882 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
883 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
884 				 int oif, struct tcp_md5sig_key *key, int rst,
885 				 u8 tclass, __be32 label, u32 priority)
886 {
887 	const struct tcphdr *th = tcp_hdr(skb);
888 	struct tcphdr *t1;
889 	struct sk_buff *buff;
890 	struct flowi6 fl6;
891 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
892 	struct sock *ctl_sk = net->ipv6.tcp_sk;
893 	unsigned int tot_len = sizeof(struct tcphdr);
894 	struct dst_entry *dst;
895 	__be32 *topt;
896 	__u32 mark = 0;
897 
898 	if (tsecr)
899 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
900 #ifdef CONFIG_TCP_MD5SIG
901 	if (key)
902 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
903 #endif
904 
905 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
906 			 GFP_ATOMIC);
907 	if (!buff)
908 		return;
909 
910 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
911 
912 	t1 = skb_push(buff, tot_len);
913 	skb_reset_transport_header(buff);
914 
915 	/* Swap the send and the receive. */
916 	memset(t1, 0, sizeof(*t1));
917 	t1->dest = th->source;
918 	t1->source = th->dest;
919 	t1->doff = tot_len / 4;
920 	t1->seq = htonl(seq);
921 	t1->ack_seq = htonl(ack);
922 	t1->ack = !rst || !th->ack;
923 	t1->rst = rst;
924 	t1->window = htons(win);
925 
926 	topt = (__be32 *)(t1 + 1);
927 
928 	if (tsecr) {
929 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
930 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
931 		*topt++ = htonl(tsval);
932 		*topt++ = htonl(tsecr);
933 	}
934 
935 #ifdef CONFIG_TCP_MD5SIG
936 	if (key) {
937 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
938 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
939 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
940 				    &ipv6_hdr(skb)->saddr,
941 				    &ipv6_hdr(skb)->daddr, t1);
942 	}
943 #endif
944 
945 	memset(&fl6, 0, sizeof(fl6));
946 	fl6.daddr = ipv6_hdr(skb)->saddr;
947 	fl6.saddr = ipv6_hdr(skb)->daddr;
948 	fl6.flowlabel = label;
949 
950 	buff->ip_summed = CHECKSUM_PARTIAL;
951 	buff->csum = 0;
952 
953 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
954 
955 	fl6.flowi6_proto = IPPROTO_TCP;
956 	if (rt6_need_strict(&fl6.daddr) && !oif)
957 		fl6.flowi6_oif = tcp_v6_iif(skb);
958 	else {
959 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
960 			oif = skb->skb_iif;
961 
962 		fl6.flowi6_oif = oif;
963 	}
964 
965 	if (sk) {
966 		if (sk->sk_state == TCP_TIME_WAIT) {
967 			mark = inet_twsk(sk)->tw_mark;
968 			/* autoflowlabel relies on buff->hash */
969 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
970 				     PKT_HASH_TYPE_L4);
971 		} else {
972 			mark = sk->sk_mark;
973 		}
974 		buff->tstamp = tcp_transmit_time(sk);
975 	}
976 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
977 	fl6.fl6_dport = t1->dest;
978 	fl6.fl6_sport = t1->source;
979 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
980 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
981 
982 	/* Pass a socket to ip6_dst_lookup either it is for RST
983 	 * Underlying function will use this to retrieve the network
984 	 * namespace
985 	 */
986 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
987 	if (!IS_ERR(dst)) {
988 		skb_dst_set(buff, dst);
989 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
990 			 tclass & ~INET_ECN_MASK, priority);
991 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
992 		if (rst)
993 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
994 		return;
995 	}
996 
997 	kfree_skb(buff);
998 }
999 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1000 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1001 {
1002 	const struct tcphdr *th = tcp_hdr(skb);
1003 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1004 	u32 seq = 0, ack_seq = 0;
1005 	struct tcp_md5sig_key *key = NULL;
1006 #ifdef CONFIG_TCP_MD5SIG
1007 	const __u8 *hash_location = NULL;
1008 	unsigned char newhash[16];
1009 	int genhash;
1010 	struct sock *sk1 = NULL;
1011 #endif
1012 	__be32 label = 0;
1013 	u32 priority = 0;
1014 	struct net *net;
1015 	int oif = 0;
1016 
1017 	if (th->rst)
1018 		return;
1019 
1020 	/* If sk not NULL, it means we did a successful lookup and incoming
1021 	 * route had to be correct. prequeue might have dropped our dst.
1022 	 */
1023 	if (!sk && !ipv6_unicast_destination(skb))
1024 		return;
1025 
1026 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1027 #ifdef CONFIG_TCP_MD5SIG
1028 	rcu_read_lock();
1029 	hash_location = tcp_parse_md5sig_option(th);
1030 	if (sk && sk_fullsock(sk)) {
1031 		int l3index;
1032 
1033 		/* sdif set, means packet ingressed via a device
1034 		 * in an L3 domain and inet_iif is set to it.
1035 		 */
1036 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1037 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1038 	} else if (hash_location) {
1039 		int dif = tcp_v6_iif_l3_slave(skb);
1040 		int sdif = tcp_v6_sdif(skb);
1041 		int l3index;
1042 
1043 		/*
1044 		 * active side is lost. Try to find listening socket through
1045 		 * source port, and then find md5 key through listening socket.
1046 		 * we are not loose security here:
1047 		 * Incoming packet is checked with md5 hash with finding key,
1048 		 * no RST generated if md5 hash doesn't match.
1049 		 */
1050 		sk1 = inet6_lookup_listener(net,
1051 					   &tcp_hashinfo, NULL, 0,
1052 					   &ipv6h->saddr,
1053 					   th->source, &ipv6h->daddr,
1054 					   ntohs(th->source), dif, sdif);
1055 		if (!sk1)
1056 			goto out;
1057 
1058 		/* sdif set, means packet ingressed via a device
1059 		 * in an L3 domain and dif is set to it.
1060 		 */
1061 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1062 
1063 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1064 		if (!key)
1065 			goto out;
1066 
1067 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1068 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1069 			goto out;
1070 	}
1071 #endif
1072 
1073 	if (th->ack)
1074 		seq = ntohl(th->ack_seq);
1075 	else
1076 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1077 			  (th->doff << 2);
1078 
1079 	if (sk) {
1080 		oif = sk->sk_bound_dev_if;
1081 		if (sk_fullsock(sk)) {
1082 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1083 
1084 			trace_tcp_send_reset(sk, skb);
1085 			if (np->repflow)
1086 				label = ip6_flowlabel(ipv6h);
1087 			priority = sk->sk_priority;
1088 		}
1089 		if (sk->sk_state == TCP_TIME_WAIT) {
1090 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1091 			priority = inet_twsk(sk)->tw_priority;
1092 		}
1093 	} else {
1094 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1095 			label = ip6_flowlabel(ipv6h);
1096 	}
1097 
1098 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1099 			     ipv6_get_dsfield(ipv6h), label, priority);
1100 
1101 #ifdef CONFIG_TCP_MD5SIG
1102 out:
1103 	rcu_read_unlock();
1104 #endif
1105 }
1106 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1107 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1108 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1109 			    struct tcp_md5sig_key *key, u8 tclass,
1110 			    __be32 label, u32 priority)
1111 {
1112 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1113 			     tclass, label, priority);
1114 }
1115 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1116 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1117 {
1118 	struct inet_timewait_sock *tw = inet_twsk(sk);
1119 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1120 
1121 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1122 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1123 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1124 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1125 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1126 
1127 	inet_twsk_put(tw);
1128 }
1129 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1130 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1131 				  struct request_sock *req)
1132 {
1133 	int l3index;
1134 
1135 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1136 
1137 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1138 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1139 	 */
1140 	/* RFC 7323 2.3
1141 	 * The window field (SEG.WND) of every outgoing segment, with the
1142 	 * exception of <SYN> segments, MUST be right-shifted by
1143 	 * Rcv.Wind.Shift bits:
1144 	 */
1145 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1146 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1147 			tcp_rsk(req)->rcv_nxt,
1148 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1149 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1150 			req->ts_recent, sk->sk_bound_dev_if,
1151 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1152 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1153 }
1154 
1155 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1156 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1157 {
1158 #ifdef CONFIG_SYN_COOKIES
1159 	const struct tcphdr *th = tcp_hdr(skb);
1160 
1161 	if (!th->syn)
1162 		sk = cookie_v6_check(sk, skb);
1163 #endif
1164 	return sk;
1165 }
1166 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1167 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1168 			 struct tcphdr *th, u32 *cookie)
1169 {
1170 	u16 mss = 0;
1171 #ifdef CONFIG_SYN_COOKIES
1172 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1173 				    &tcp_request_sock_ipv6_ops, sk, th);
1174 	if (mss) {
1175 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1176 		tcp_synq_overflow(sk);
1177 	}
1178 #endif
1179 	return mss;
1180 }
1181 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1182 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1183 {
1184 	if (skb->protocol == htons(ETH_P_IP))
1185 		return tcp_v4_conn_request(sk, skb);
1186 
1187 	if (!ipv6_unicast_destination(skb))
1188 		goto drop;
1189 
1190 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1191 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1192 		return 0;
1193 	}
1194 
1195 	return tcp_conn_request(&tcp6_request_sock_ops,
1196 				&tcp_request_sock_ipv6_ops, sk, skb);
1197 
1198 drop:
1199 	tcp_listendrop(sk);
1200 	return 0; /* don't send reset */
1201 }
1202 
tcp_v6_restore_cb(struct sk_buff * skb)1203 static void tcp_v6_restore_cb(struct sk_buff *skb)
1204 {
1205 	/* We need to move header back to the beginning if xfrm6_policy_check()
1206 	 * and tcp_v6_fill_cb() are going to be called again.
1207 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1208 	 */
1209 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1210 		sizeof(struct inet6_skb_parm));
1211 }
1212 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1213 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1214 					 struct request_sock *req,
1215 					 struct dst_entry *dst,
1216 					 struct request_sock *req_unhash,
1217 					 bool *own_req)
1218 {
1219 	struct inet_request_sock *ireq;
1220 	struct ipv6_pinfo *newnp;
1221 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1222 	struct ipv6_txoptions *opt;
1223 	struct inet_sock *newinet;
1224 	bool found_dup_sk = false;
1225 	struct tcp_sock *newtp;
1226 	struct sock *newsk;
1227 #ifdef CONFIG_TCP_MD5SIG
1228 	struct tcp_md5sig_key *key;
1229 	int l3index;
1230 #endif
1231 	struct flowi6 fl6;
1232 
1233 	if (skb->protocol == htons(ETH_P_IP)) {
1234 		/*
1235 		 *	v6 mapped
1236 		 */
1237 
1238 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1239 					     req_unhash, own_req);
1240 
1241 		if (!newsk)
1242 			return NULL;
1243 
1244 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1245 
1246 		newinet = inet_sk(newsk);
1247 		newnp = tcp_inet6_sk(newsk);
1248 		newtp = tcp_sk(newsk);
1249 
1250 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1251 
1252 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1253 
1254 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1255 		if (sk_is_mptcp(newsk))
1256 			mptcpv6_handle_mapped(newsk, true);
1257 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1258 #ifdef CONFIG_TCP_MD5SIG
1259 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1260 #endif
1261 
1262 		newnp->ipv6_mc_list = NULL;
1263 		newnp->ipv6_ac_list = NULL;
1264 		newnp->ipv6_fl_list = NULL;
1265 		newnp->pktoptions  = NULL;
1266 		newnp->opt	   = NULL;
1267 		newnp->mcast_oif   = inet_iif(skb);
1268 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1269 		newnp->rcv_flowinfo = 0;
1270 		if (np->repflow)
1271 			newnp->flow_label = 0;
1272 
1273 		/*
1274 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1275 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1276 		 * that function for the gory details. -acme
1277 		 */
1278 
1279 		/* It is tricky place. Until this moment IPv4 tcp
1280 		   worked with IPv6 icsk.icsk_af_ops.
1281 		   Sync it now.
1282 		 */
1283 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1284 
1285 		return newsk;
1286 	}
1287 
1288 	ireq = inet_rsk(req);
1289 
1290 	if (sk_acceptq_is_full(sk))
1291 		goto out_overflow;
1292 
1293 	if (!dst) {
1294 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1295 		if (!dst)
1296 			goto out;
1297 	}
1298 
1299 	newsk = tcp_create_openreq_child(sk, req, skb);
1300 	if (!newsk)
1301 		goto out_nonewsk;
1302 
1303 	/*
1304 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1305 	 * count here, tcp_create_openreq_child now does this for us, see the
1306 	 * comment in that function for the gory details. -acme
1307 	 */
1308 
1309 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1310 	ip6_dst_store(newsk, dst, NULL, NULL);
1311 	inet6_sk_rx_dst_set(newsk, skb);
1312 
1313 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1314 
1315 	newtp = tcp_sk(newsk);
1316 	newinet = inet_sk(newsk);
1317 	newnp = tcp_inet6_sk(newsk);
1318 
1319 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1320 
1321 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1322 	newnp->saddr = ireq->ir_v6_loc_addr;
1323 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1324 	newsk->sk_bound_dev_if = ireq->ir_iif;
1325 
1326 	/* Now IPv6 options...
1327 
1328 	   First: no IPv4 options.
1329 	 */
1330 	newinet->inet_opt = NULL;
1331 	newnp->ipv6_mc_list = NULL;
1332 	newnp->ipv6_ac_list = NULL;
1333 	newnp->ipv6_fl_list = NULL;
1334 
1335 	/* Clone RX bits */
1336 	newnp->rxopt.all = np->rxopt.all;
1337 
1338 	newnp->pktoptions = NULL;
1339 	newnp->opt	  = NULL;
1340 	newnp->mcast_oif  = tcp_v6_iif(skb);
1341 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1342 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1343 	if (np->repflow)
1344 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1345 
1346 	/* Set ToS of the new socket based upon the value of incoming SYN.
1347 	 * ECT bits are set later in tcp_init_transfer().
1348 	 */
1349 	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1350 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1351 
1352 	/* Clone native IPv6 options from listening socket (if any)
1353 
1354 	   Yes, keeping reference count would be much more clever,
1355 	   but we make one more one thing there: reattach optmem
1356 	   to newsk.
1357 	 */
1358 	opt = ireq->ipv6_opt;
1359 	if (!opt)
1360 		opt = rcu_dereference(np->opt);
1361 	if (opt) {
1362 		opt = ipv6_dup_options(newsk, opt);
1363 		RCU_INIT_POINTER(newnp->opt, opt);
1364 	}
1365 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1366 	if (opt)
1367 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1368 						    opt->opt_flen;
1369 
1370 	tcp_ca_openreq_child(newsk, dst);
1371 
1372 	tcp_sync_mss(newsk, dst_mtu(dst));
1373 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1374 
1375 	tcp_initialize_rcv_mss(newsk);
1376 
1377 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1378 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1379 
1380 #ifdef CONFIG_TCP_MD5SIG
1381 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1382 
1383 	/* Copy over the MD5 key from the original socket */
1384 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1385 	if (key) {
1386 		/* We're using one, so create a matching key
1387 		 * on the newsk structure. If we fail to get
1388 		 * memory, then we end up not copying the key
1389 		 * across. Shucks.
1390 		 */
1391 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1392 			       AF_INET6, 128, l3index, key->key, key->keylen,
1393 			       sk_gfp_mask(sk, GFP_ATOMIC));
1394 	}
1395 #endif
1396 
1397 	if (__inet_inherit_port(sk, newsk) < 0) {
1398 		inet_csk_prepare_forced_close(newsk);
1399 		tcp_done(newsk);
1400 		goto out;
1401 	}
1402 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1403 				       &found_dup_sk);
1404 	if (*own_req) {
1405 		tcp_move_syn(newtp, req);
1406 
1407 		/* Clone pktoptions received with SYN, if we own the req */
1408 		if (ireq->pktopts) {
1409 			newnp->pktoptions = skb_clone(ireq->pktopts,
1410 						      sk_gfp_mask(sk, GFP_ATOMIC));
1411 			consume_skb(ireq->pktopts);
1412 			ireq->pktopts = NULL;
1413 			if (newnp->pktoptions) {
1414 				tcp_v6_restore_cb(newnp->pktoptions);
1415 				skb_set_owner_r(newnp->pktoptions, newsk);
1416 			}
1417 		}
1418 	} else {
1419 		if (!req_unhash && found_dup_sk) {
1420 			/* This code path should only be executed in the
1421 			 * syncookie case only
1422 			 */
1423 			bh_unlock_sock(newsk);
1424 			sock_put(newsk);
1425 			newsk = NULL;
1426 		}
1427 	}
1428 
1429 	return newsk;
1430 
1431 out_overflow:
1432 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1433 out_nonewsk:
1434 	dst_release(dst);
1435 out:
1436 	tcp_listendrop(sk);
1437 	return NULL;
1438 }
1439 
1440 /* The socket must have it's spinlock held when we get
1441  * here, unless it is a TCP_LISTEN socket.
1442  *
1443  * We have a potential double-lock case here, so even when
1444  * doing backlog processing we use the BH locking scheme.
1445  * This is because we cannot sleep with the original spinlock
1446  * held.
1447  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1448 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1449 {
1450 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1451 	struct sk_buff *opt_skb = NULL;
1452 	struct tcp_sock *tp;
1453 
1454 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1455 	   goes to IPv4 receive handler and backlogged.
1456 	   From backlog it always goes here. Kerboom...
1457 	   Fortunately, tcp_rcv_established and rcv_established
1458 	   handle them correctly, but it is not case with
1459 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1460 	 */
1461 
1462 	if (skb->protocol == htons(ETH_P_IP))
1463 		return tcp_v4_do_rcv(sk, skb);
1464 
1465 	/*
1466 	 *	socket locking is here for SMP purposes as backlog rcv
1467 	 *	is currently called with bh processing disabled.
1468 	 */
1469 
1470 	/* Do Stevens' IPV6_PKTOPTIONS.
1471 
1472 	   Yes, guys, it is the only place in our code, where we
1473 	   may make it not affecting IPv4.
1474 	   The rest of code is protocol independent,
1475 	   and I do not like idea to uglify IPv4.
1476 
1477 	   Actually, all the idea behind IPV6_PKTOPTIONS
1478 	   looks not very well thought. For now we latch
1479 	   options, received in the last packet, enqueued
1480 	   by tcp. Feel free to propose better solution.
1481 					       --ANK (980728)
1482 	 */
1483 	if (np->rxopt.all)
1484 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1485 
1486 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1487 		struct dst_entry *dst = sk->sk_rx_dst;
1488 
1489 		sock_rps_save_rxhash(sk, skb);
1490 		sk_mark_napi_id(sk, skb);
1491 		if (dst) {
1492 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1493 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1494 				dst_release(dst);
1495 				sk->sk_rx_dst = NULL;
1496 			}
1497 		}
1498 
1499 		tcp_rcv_established(sk, skb);
1500 		if (opt_skb)
1501 			goto ipv6_pktoptions;
1502 		return 0;
1503 	}
1504 
1505 	if (tcp_checksum_complete(skb))
1506 		goto csum_err;
1507 
1508 	if (sk->sk_state == TCP_LISTEN) {
1509 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1510 
1511 		if (!nsk)
1512 			goto discard;
1513 
1514 		if (nsk != sk) {
1515 			if (tcp_child_process(sk, nsk, skb))
1516 				goto reset;
1517 			if (opt_skb)
1518 				__kfree_skb(opt_skb);
1519 			return 0;
1520 		}
1521 	} else
1522 		sock_rps_save_rxhash(sk, skb);
1523 
1524 	if (tcp_rcv_state_process(sk, skb))
1525 		goto reset;
1526 	if (opt_skb)
1527 		goto ipv6_pktoptions;
1528 	return 0;
1529 
1530 reset:
1531 	tcp_v6_send_reset(sk, skb);
1532 discard:
1533 	if (opt_skb)
1534 		__kfree_skb(opt_skb);
1535 	kfree_skb(skb);
1536 	return 0;
1537 csum_err:
1538 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1539 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1540 	goto discard;
1541 
1542 
1543 ipv6_pktoptions:
1544 	/* Do you ask, what is it?
1545 
1546 	   1. skb was enqueued by tcp.
1547 	   2. skb is added to tail of read queue, rather than out of order.
1548 	   3. socket is not in passive state.
1549 	   4. Finally, it really contains options, which user wants to receive.
1550 	 */
1551 	tp = tcp_sk(sk);
1552 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1553 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1554 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1555 			np->mcast_oif = tcp_v6_iif(opt_skb);
1556 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1557 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1558 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1559 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1560 		if (np->repflow)
1561 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1562 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1563 			skb_set_owner_r(opt_skb, sk);
1564 			tcp_v6_restore_cb(opt_skb);
1565 			opt_skb = xchg(&np->pktoptions, opt_skb);
1566 		} else {
1567 			__kfree_skb(opt_skb);
1568 			opt_skb = xchg(&np->pktoptions, NULL);
1569 		}
1570 	}
1571 
1572 	kfree_skb(opt_skb);
1573 	return 0;
1574 }
1575 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1576 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1577 			   const struct tcphdr *th)
1578 {
1579 	/* This is tricky: we move IP6CB at its correct location into
1580 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1581 	 * _decode_session6() uses IP6CB().
1582 	 * barrier() makes sure compiler won't play aliasing games.
1583 	 */
1584 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1585 		sizeof(struct inet6_skb_parm));
1586 	barrier();
1587 
1588 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1589 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1590 				    skb->len - th->doff*4);
1591 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1592 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1593 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1594 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1595 	TCP_SKB_CB(skb)->sacked = 0;
1596 	TCP_SKB_CB(skb)->has_rxtstamp =
1597 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1598 }
1599 
tcp_v6_rcv(struct sk_buff * skb)1600 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1601 {
1602 	struct sk_buff *skb_to_free;
1603 	int sdif = inet6_sdif(skb);
1604 	int dif = inet6_iif(skb);
1605 	const struct tcphdr *th;
1606 	const struct ipv6hdr *hdr;
1607 	bool refcounted;
1608 	struct sock *sk;
1609 	int ret;
1610 	struct net *net = dev_net(skb->dev);
1611 
1612 	if (skb->pkt_type != PACKET_HOST)
1613 		goto discard_it;
1614 
1615 	/*
1616 	 *	Count it even if it's bad.
1617 	 */
1618 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1619 
1620 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1621 		goto discard_it;
1622 
1623 	th = (const struct tcphdr *)skb->data;
1624 
1625 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1626 		goto bad_packet;
1627 	if (!pskb_may_pull(skb, th->doff*4))
1628 		goto discard_it;
1629 
1630 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1631 		goto csum_error;
1632 
1633 	th = (const struct tcphdr *)skb->data;
1634 	hdr = ipv6_hdr(skb);
1635 
1636 lookup:
1637 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1638 				th->source, th->dest, inet6_iif(skb), sdif,
1639 				&refcounted);
1640 	if (!sk)
1641 		goto no_tcp_socket;
1642 
1643 process:
1644 	if (sk->sk_state == TCP_TIME_WAIT)
1645 		goto do_time_wait;
1646 
1647 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1648 		struct request_sock *req = inet_reqsk(sk);
1649 		bool req_stolen = false;
1650 		struct sock *nsk;
1651 
1652 		sk = req->rsk_listener;
1653 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1654 			sk_drops_add(sk, skb);
1655 			reqsk_put(req);
1656 			goto discard_it;
1657 		}
1658 		if (tcp_checksum_complete(skb)) {
1659 			reqsk_put(req);
1660 			goto csum_error;
1661 		}
1662 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1663 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1664 			goto lookup;
1665 		}
1666 		sock_hold(sk);
1667 		refcounted = true;
1668 		nsk = NULL;
1669 		if (!tcp_filter(sk, skb)) {
1670 			th = (const struct tcphdr *)skb->data;
1671 			hdr = ipv6_hdr(skb);
1672 			tcp_v6_fill_cb(skb, hdr, th);
1673 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1674 		}
1675 		if (!nsk) {
1676 			reqsk_put(req);
1677 			if (req_stolen) {
1678 				/* Another cpu got exclusive access to req
1679 				 * and created a full blown socket.
1680 				 * Try to feed this packet to this socket
1681 				 * instead of discarding it.
1682 				 */
1683 				tcp_v6_restore_cb(skb);
1684 				sock_put(sk);
1685 				goto lookup;
1686 			}
1687 			goto discard_and_relse;
1688 		}
1689 		if (nsk == sk) {
1690 			reqsk_put(req);
1691 			tcp_v6_restore_cb(skb);
1692 		} else if (tcp_child_process(sk, nsk, skb)) {
1693 			tcp_v6_send_reset(nsk, skb);
1694 			goto discard_and_relse;
1695 		} else {
1696 			sock_put(sk);
1697 			return 0;
1698 		}
1699 	}
1700 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1701 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1702 		goto discard_and_relse;
1703 	}
1704 
1705 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1706 		goto discard_and_relse;
1707 
1708 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1709 		goto discard_and_relse;
1710 
1711 	if (tcp_filter(sk, skb))
1712 		goto discard_and_relse;
1713 	th = (const struct tcphdr *)skb->data;
1714 	hdr = ipv6_hdr(skb);
1715 	tcp_v6_fill_cb(skb, hdr, th);
1716 
1717 	skb->dev = NULL;
1718 
1719 	if (sk->sk_state == TCP_LISTEN) {
1720 		ret = tcp_v6_do_rcv(sk, skb);
1721 		goto put_and_return;
1722 	}
1723 
1724 	sk_incoming_cpu_update(sk);
1725 
1726 	bh_lock_sock_nested(sk);
1727 	tcp_segs_in(tcp_sk(sk), skb);
1728 	ret = 0;
1729 	if (!sock_owned_by_user(sk)) {
1730 		skb_to_free = sk->sk_rx_skb_cache;
1731 		sk->sk_rx_skb_cache = NULL;
1732 		ret = tcp_v6_do_rcv(sk, skb);
1733 	} else {
1734 		if (tcp_add_backlog(sk, skb))
1735 			goto discard_and_relse;
1736 		skb_to_free = NULL;
1737 	}
1738 	bh_unlock_sock(sk);
1739 	if (skb_to_free)
1740 		__kfree_skb(skb_to_free);
1741 put_and_return:
1742 	if (refcounted)
1743 		sock_put(sk);
1744 	return ret ? -1 : 0;
1745 
1746 no_tcp_socket:
1747 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1748 		goto discard_it;
1749 
1750 	tcp_v6_fill_cb(skb, hdr, th);
1751 
1752 	if (tcp_checksum_complete(skb)) {
1753 csum_error:
1754 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1755 bad_packet:
1756 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1757 	} else {
1758 		tcp_v6_send_reset(NULL, skb);
1759 	}
1760 
1761 discard_it:
1762 	kfree_skb(skb);
1763 	return 0;
1764 
1765 discard_and_relse:
1766 	sk_drops_add(sk, skb);
1767 	if (refcounted)
1768 		sock_put(sk);
1769 	goto discard_it;
1770 
1771 do_time_wait:
1772 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1773 		inet_twsk_put(inet_twsk(sk));
1774 		goto discard_it;
1775 	}
1776 
1777 	tcp_v6_fill_cb(skb, hdr, th);
1778 
1779 	if (tcp_checksum_complete(skb)) {
1780 		inet_twsk_put(inet_twsk(sk));
1781 		goto csum_error;
1782 	}
1783 
1784 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1785 	case TCP_TW_SYN:
1786 	{
1787 		struct sock *sk2;
1788 
1789 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1790 					    skb, __tcp_hdrlen(th),
1791 					    &ipv6_hdr(skb)->saddr, th->source,
1792 					    &ipv6_hdr(skb)->daddr,
1793 					    ntohs(th->dest),
1794 					    tcp_v6_iif_l3_slave(skb),
1795 					    sdif);
1796 		if (sk2) {
1797 			struct inet_timewait_sock *tw = inet_twsk(sk);
1798 			inet_twsk_deschedule_put(tw);
1799 			sk = sk2;
1800 			tcp_v6_restore_cb(skb);
1801 			refcounted = false;
1802 			goto process;
1803 		}
1804 	}
1805 		/* to ACK */
1806 		fallthrough;
1807 	case TCP_TW_ACK:
1808 		tcp_v6_timewait_ack(sk, skb);
1809 		break;
1810 	case TCP_TW_RST:
1811 		tcp_v6_send_reset(sk, skb);
1812 		inet_twsk_deschedule_put(inet_twsk(sk));
1813 		goto discard_it;
1814 	case TCP_TW_SUCCESS:
1815 		;
1816 	}
1817 	goto discard_it;
1818 }
1819 
tcp_v6_early_demux(struct sk_buff * skb)1820 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1821 {
1822 	const struct ipv6hdr *hdr;
1823 	const struct tcphdr *th;
1824 	struct sock *sk;
1825 
1826 	if (skb->pkt_type != PACKET_HOST)
1827 		return;
1828 
1829 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1830 		return;
1831 
1832 	hdr = ipv6_hdr(skb);
1833 	th = tcp_hdr(skb);
1834 
1835 	if (th->doff < sizeof(struct tcphdr) / 4)
1836 		return;
1837 
1838 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1839 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1840 					&hdr->saddr, th->source,
1841 					&hdr->daddr, ntohs(th->dest),
1842 					inet6_iif(skb), inet6_sdif(skb));
1843 	if (sk) {
1844 		skb->sk = sk;
1845 		skb->destructor = sock_edemux;
1846 		if (sk_fullsock(sk)) {
1847 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1848 
1849 			if (dst)
1850 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1851 			if (dst &&
1852 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1853 				skb_dst_set_noref(skb, dst);
1854 		}
1855 	}
1856 }
1857 
1858 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1859 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1860 	.twsk_unique	= tcp_twsk_unique,
1861 	.twsk_destructor = tcp_twsk_destructor,
1862 };
1863 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1864 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1865 {
1866 	struct ipv6_pinfo *np = inet6_sk(sk);
1867 
1868 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1869 }
1870 
1871 const struct inet_connection_sock_af_ops ipv6_specific = {
1872 	.queue_xmit	   = inet6_csk_xmit,
1873 	.send_check	   = tcp_v6_send_check,
1874 	.rebuild_header	   = inet6_sk_rebuild_header,
1875 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1876 	.conn_request	   = tcp_v6_conn_request,
1877 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1878 	.net_header_len	   = sizeof(struct ipv6hdr),
1879 	.net_frag_header_len = sizeof(struct frag_hdr),
1880 	.setsockopt	   = ipv6_setsockopt,
1881 	.getsockopt	   = ipv6_getsockopt,
1882 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1883 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1884 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1885 };
1886 
1887 #ifdef CONFIG_TCP_MD5SIG
1888 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1889 	.md5_lookup	=	tcp_v6_md5_lookup,
1890 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1891 	.md5_parse	=	tcp_v6_parse_md5_keys,
1892 };
1893 #endif
1894 
1895 /*
1896  *	TCP over IPv4 via INET6 API
1897  */
1898 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1899 	.queue_xmit	   = ip_queue_xmit,
1900 	.send_check	   = tcp_v4_send_check,
1901 	.rebuild_header	   = inet_sk_rebuild_header,
1902 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1903 	.conn_request	   = tcp_v6_conn_request,
1904 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1905 	.net_header_len	   = sizeof(struct iphdr),
1906 	.setsockopt	   = ipv6_setsockopt,
1907 	.getsockopt	   = ipv6_getsockopt,
1908 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1909 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1910 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1911 };
1912 
1913 #ifdef CONFIG_TCP_MD5SIG
1914 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1915 	.md5_lookup	=	tcp_v4_md5_lookup,
1916 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1917 	.md5_parse	=	tcp_v6_parse_md5_keys,
1918 };
1919 #endif
1920 
1921 /* NOTE: A lot of things set to zero explicitly by call to
1922  *       sk_alloc() so need not be done here.
1923  */
tcp_v6_init_sock(struct sock * sk)1924 static int tcp_v6_init_sock(struct sock *sk)
1925 {
1926 	struct inet_connection_sock *icsk = inet_csk(sk);
1927 
1928 	tcp_init_sock(sk);
1929 
1930 	icsk->icsk_af_ops = &ipv6_specific;
1931 
1932 #ifdef CONFIG_TCP_MD5SIG
1933 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1934 #endif
1935 
1936 	return 0;
1937 }
1938 
tcp_v6_destroy_sock(struct sock * sk)1939 static void tcp_v6_destroy_sock(struct sock *sk)
1940 {
1941 	tcp_v4_destroy_sock(sk);
1942 	inet6_destroy_sock(sk);
1943 }
1944 
1945 #ifdef CONFIG_PROC_FS
1946 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1947 static void get_openreq6(struct seq_file *seq,
1948 			 const struct request_sock *req, int i)
1949 {
1950 	long ttd = req->rsk_timer.expires - jiffies;
1951 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1952 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1953 
1954 	if (ttd < 0)
1955 		ttd = 0;
1956 
1957 	seq_printf(seq,
1958 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1959 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1960 		   i,
1961 		   src->s6_addr32[0], src->s6_addr32[1],
1962 		   src->s6_addr32[2], src->s6_addr32[3],
1963 		   inet_rsk(req)->ir_num,
1964 		   dest->s6_addr32[0], dest->s6_addr32[1],
1965 		   dest->s6_addr32[2], dest->s6_addr32[3],
1966 		   ntohs(inet_rsk(req)->ir_rmt_port),
1967 		   TCP_SYN_RECV,
1968 		   0, 0, /* could print option size, but that is af dependent. */
1969 		   1,   /* timers active (only the expire timer) */
1970 		   jiffies_to_clock_t(ttd),
1971 		   req->num_timeout,
1972 		   from_kuid_munged(seq_user_ns(seq),
1973 				    sock_i_uid(req->rsk_listener)),
1974 		   0,  /* non standard timer */
1975 		   0, /* open_requests have no inode */
1976 		   0, req);
1977 }
1978 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1979 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1980 {
1981 	const struct in6_addr *dest, *src;
1982 	__u16 destp, srcp;
1983 	int timer_active;
1984 	unsigned long timer_expires;
1985 	const struct inet_sock *inet = inet_sk(sp);
1986 	const struct tcp_sock *tp = tcp_sk(sp);
1987 	const struct inet_connection_sock *icsk = inet_csk(sp);
1988 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1989 	int rx_queue;
1990 	int state;
1991 
1992 	dest  = &sp->sk_v6_daddr;
1993 	src   = &sp->sk_v6_rcv_saddr;
1994 	destp = ntohs(inet->inet_dport);
1995 	srcp  = ntohs(inet->inet_sport);
1996 
1997 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1998 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1999 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2000 		timer_active	= 1;
2001 		timer_expires	= icsk->icsk_timeout;
2002 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2003 		timer_active	= 4;
2004 		timer_expires	= icsk->icsk_timeout;
2005 	} else if (timer_pending(&sp->sk_timer)) {
2006 		timer_active	= 2;
2007 		timer_expires	= sp->sk_timer.expires;
2008 	} else {
2009 		timer_active	= 0;
2010 		timer_expires = jiffies;
2011 	}
2012 
2013 	state = inet_sk_state_load(sp);
2014 	if (state == TCP_LISTEN)
2015 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2016 	else
2017 		/* Because we don't lock the socket,
2018 		 * we might find a transient negative value.
2019 		 */
2020 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2021 				      READ_ONCE(tp->copied_seq), 0);
2022 
2023 	seq_printf(seq,
2024 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2025 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2026 		   i,
2027 		   src->s6_addr32[0], src->s6_addr32[1],
2028 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2029 		   dest->s6_addr32[0], dest->s6_addr32[1],
2030 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2031 		   state,
2032 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2033 		   rx_queue,
2034 		   timer_active,
2035 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2036 		   icsk->icsk_retransmits,
2037 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2038 		   icsk->icsk_probes_out,
2039 		   sock_i_ino(sp),
2040 		   refcount_read(&sp->sk_refcnt), sp,
2041 		   jiffies_to_clock_t(icsk->icsk_rto),
2042 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2043 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2044 		   tp->snd_cwnd,
2045 		   state == TCP_LISTEN ?
2046 			fastopenq->max_qlen :
2047 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2048 		   );
2049 }
2050 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2051 static void get_timewait6_sock(struct seq_file *seq,
2052 			       struct inet_timewait_sock *tw, int i)
2053 {
2054 	long delta = tw->tw_timer.expires - jiffies;
2055 	const struct in6_addr *dest, *src;
2056 	__u16 destp, srcp;
2057 
2058 	dest = &tw->tw_v6_daddr;
2059 	src  = &tw->tw_v6_rcv_saddr;
2060 	destp = ntohs(tw->tw_dport);
2061 	srcp  = ntohs(tw->tw_sport);
2062 
2063 	seq_printf(seq,
2064 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2065 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2066 		   i,
2067 		   src->s6_addr32[0], src->s6_addr32[1],
2068 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2069 		   dest->s6_addr32[0], dest->s6_addr32[1],
2070 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2071 		   tw->tw_substate, 0, 0,
2072 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2073 		   refcount_read(&tw->tw_refcnt), tw);
2074 }
2075 
tcp6_seq_show(struct seq_file * seq,void * v)2076 static int tcp6_seq_show(struct seq_file *seq, void *v)
2077 {
2078 	struct tcp_iter_state *st;
2079 	struct sock *sk = v;
2080 
2081 	if (v == SEQ_START_TOKEN) {
2082 		seq_puts(seq,
2083 			 "  sl  "
2084 			 "local_address                         "
2085 			 "remote_address                        "
2086 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2087 			 "   uid  timeout inode\n");
2088 		goto out;
2089 	}
2090 	st = seq->private;
2091 
2092 	if (sk->sk_state == TCP_TIME_WAIT)
2093 		get_timewait6_sock(seq, v, st->num);
2094 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2095 		get_openreq6(seq, v, st->num);
2096 	else
2097 		get_tcp6_sock(seq, v, st->num);
2098 out:
2099 	return 0;
2100 }
2101 
2102 static const struct seq_operations tcp6_seq_ops = {
2103 	.show		= tcp6_seq_show,
2104 	.start		= tcp_seq_start,
2105 	.next		= tcp_seq_next,
2106 	.stop		= tcp_seq_stop,
2107 };
2108 
2109 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2110 	.family		= AF_INET6,
2111 };
2112 
tcp6_proc_init(struct net * net)2113 int __net_init tcp6_proc_init(struct net *net)
2114 {
2115 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2116 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2117 		return -ENOMEM;
2118 	return 0;
2119 }
2120 
tcp6_proc_exit(struct net * net)2121 void tcp6_proc_exit(struct net *net)
2122 {
2123 	remove_proc_entry("tcp6", net->proc_net);
2124 }
2125 #endif
2126 
2127 struct proto tcpv6_prot = {
2128 	.name			= "TCPv6",
2129 	.owner			= THIS_MODULE,
2130 	.close			= tcp_close,
2131 	.pre_connect		= tcp_v6_pre_connect,
2132 	.connect		= tcp_v6_connect,
2133 	.disconnect		= tcp_disconnect,
2134 	.accept			= inet_csk_accept,
2135 	.ioctl			= tcp_ioctl,
2136 	.init			= tcp_v6_init_sock,
2137 	.destroy		= tcp_v6_destroy_sock,
2138 	.shutdown		= tcp_shutdown,
2139 	.setsockopt		= tcp_setsockopt,
2140 	.getsockopt		= tcp_getsockopt,
2141 	.keepalive		= tcp_set_keepalive,
2142 	.recvmsg		= tcp_recvmsg,
2143 	.sendmsg		= tcp_sendmsg,
2144 	.sendpage		= tcp_sendpage,
2145 	.backlog_rcv		= tcp_v6_do_rcv,
2146 	.release_cb		= tcp_release_cb,
2147 	.hash			= inet6_hash,
2148 	.unhash			= inet_unhash,
2149 	.get_port		= inet_csk_get_port,
2150 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2151 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2152 	.stream_memory_free	= tcp_stream_memory_free,
2153 	.sockets_allocated	= &tcp_sockets_allocated,
2154 	.memory_allocated	= &tcp_memory_allocated,
2155 	.memory_pressure	= &tcp_memory_pressure,
2156 	.orphan_count		= &tcp_orphan_count,
2157 	.sysctl_mem		= sysctl_tcp_mem,
2158 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2159 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2160 	.max_header		= MAX_TCP_HEADER,
2161 	.obj_size		= sizeof(struct tcp6_sock),
2162 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2163 	.twsk_prot		= &tcp6_timewait_sock_ops,
2164 	.rsk_prot		= &tcp6_request_sock_ops,
2165 	.h.hashinfo		= &tcp_hashinfo,
2166 	.no_autobind		= true,
2167 	.diag_destroy		= tcp_abort,
2168 };
2169 EXPORT_SYMBOL_GPL(tcpv6_prot);
2170 
2171 /* thinking of making this const? Don't.
2172  * early_demux can change based on sysctl.
2173  */
2174 static struct inet6_protocol tcpv6_protocol = {
2175 	.early_demux	=	tcp_v6_early_demux,
2176 	.early_demux_handler =  tcp_v6_early_demux,
2177 	.handler	=	tcp_v6_rcv,
2178 	.err_handler	=	tcp_v6_err,
2179 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2180 };
2181 
2182 static struct inet_protosw tcpv6_protosw = {
2183 	.type		=	SOCK_STREAM,
2184 	.protocol	=	IPPROTO_TCP,
2185 	.prot		=	&tcpv6_prot,
2186 	.ops		=	&inet6_stream_ops,
2187 	.flags		=	INET_PROTOSW_PERMANENT |
2188 				INET_PROTOSW_ICSK,
2189 };
2190 
tcpv6_net_init(struct net * net)2191 static int __net_init tcpv6_net_init(struct net *net)
2192 {
2193 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2194 				    SOCK_RAW, IPPROTO_TCP, net);
2195 }
2196 
tcpv6_net_exit(struct net * net)2197 static void __net_exit tcpv6_net_exit(struct net *net)
2198 {
2199 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2200 }
2201 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2202 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2203 {
2204 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2205 }
2206 
2207 static struct pernet_operations tcpv6_net_ops = {
2208 	.init	    = tcpv6_net_init,
2209 	.exit	    = tcpv6_net_exit,
2210 	.exit_batch = tcpv6_net_exit_batch,
2211 };
2212 
tcpv6_init(void)2213 int __init tcpv6_init(void)
2214 {
2215 	int ret;
2216 
2217 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2218 	if (ret)
2219 		goto out;
2220 
2221 	/* register inet6 protocol */
2222 	ret = inet6_register_protosw(&tcpv6_protosw);
2223 	if (ret)
2224 		goto out_tcpv6_protocol;
2225 
2226 	ret = register_pernet_subsys(&tcpv6_net_ops);
2227 	if (ret)
2228 		goto out_tcpv6_protosw;
2229 
2230 	ret = mptcpv6_init();
2231 	if (ret)
2232 		goto out_tcpv6_pernet_subsys;
2233 
2234 out:
2235 	return ret;
2236 
2237 out_tcpv6_pernet_subsys:
2238 	unregister_pernet_subsys(&tcpv6_net_ops);
2239 out_tcpv6_protosw:
2240 	inet6_unregister_protosw(&tcpv6_protosw);
2241 out_tcpv6_protocol:
2242 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2243 	goto out;
2244 }
2245 
tcpv6_exit(void)2246 void tcpv6_exit(void)
2247 {
2248 	unregister_pernet_subsys(&tcpv6_net_ops);
2249 	inet6_unregister_protosw(&tcpv6_protosw);
2250 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2251 }
2252