• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		rcu_assign_pointer(sk->sk_rx_dst, dst);
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
241 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
242 		if (sk_is_mptcp(sk))
243 			mptcpv6_handle_mapped(sk, true);
244 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247 #endif
248 
249 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250 
251 		if (err) {
252 			icsk->icsk_ext_hdr_len = exthdrlen;
253 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
254 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
255 			if (sk_is_mptcp(sk))
256 				mptcpv6_handle_mapped(sk, false);
257 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
258 #ifdef CONFIG_TCP_MD5SIG
259 			tp->af_specific = &tcp_sock_ipv6_specific;
260 #endif
261 			goto failure;
262 		}
263 		np->saddr = sk->sk_v6_rcv_saddr;
264 
265 		return err;
266 	}
267 
268 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269 		saddr = &sk->sk_v6_rcv_saddr;
270 
271 	fl6.flowi6_proto = IPPROTO_TCP;
272 	fl6.daddr = sk->sk_v6_daddr;
273 	fl6.saddr = saddr ? *saddr : np->saddr;
274 	fl6.flowi6_oif = sk->sk_bound_dev_if;
275 	fl6.flowi6_mark = sk->sk_mark;
276 	fl6.fl6_dport = usin->sin6_port;
277 	fl6.fl6_sport = inet->inet_sport;
278 	fl6.flowi6_uid = sk->sk_uid;
279 
280 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
281 	final_p = fl6_update_dst(&fl6, opt, &final);
282 
283 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
284 
285 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
286 	if (IS_ERR(dst)) {
287 		err = PTR_ERR(dst);
288 		goto failure;
289 	}
290 
291 	if (!saddr) {
292 		saddr = &fl6.saddr;
293 		sk->sk_v6_rcv_saddr = *saddr;
294 	}
295 
296 	/* set the source address */
297 	np->saddr = *saddr;
298 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
299 
300 	sk->sk_gso_type = SKB_GSO_TCPV6;
301 	ip6_dst_store(sk, dst, NULL, NULL);
302 
303 	icsk->icsk_ext_hdr_len = 0;
304 	if (opt)
305 		icsk->icsk_ext_hdr_len = opt->opt_flen +
306 					 opt->opt_nflen;
307 
308 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
309 
310 	inet->inet_dport = usin->sin6_port;
311 
312 	tcp_set_state(sk, TCP_SYN_SENT);
313 	err = inet6_hash_connect(tcp_death_row, sk);
314 	if (err)
315 		goto late_failure;
316 
317 	sk_set_txhash(sk);
318 
319 	if (likely(!tp->repair)) {
320 		if (!tp->write_seq)
321 			WRITE_ONCE(tp->write_seq,
322 				   secure_tcpv6_seq(np->saddr.s6_addr32,
323 						    sk->sk_v6_daddr.s6_addr32,
324 						    inet->inet_sport,
325 						    inet->inet_dport));
326 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
327 						   np->saddr.s6_addr32,
328 						   sk->sk_v6_daddr.s6_addr32);
329 	}
330 
331 	if (tcp_fastopen_defer_connect(sk, &err))
332 		return err;
333 	if (err)
334 		goto late_failure;
335 
336 	err = tcp_connect(sk);
337 	if (err)
338 		goto late_failure;
339 
340 	return 0;
341 
342 late_failure:
343 	tcp_set_state(sk, TCP_CLOSE);
344 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
345 		inet_reset_saddr(sk);
346 failure:
347 	inet->inet_dport = 0;
348 	sk->sk_route_caps = 0;
349 	return err;
350 }
351 
tcp_v6_mtu_reduced(struct sock * sk)352 static void tcp_v6_mtu_reduced(struct sock *sk)
353 {
354 	struct dst_entry *dst;
355 	u32 mtu;
356 
357 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
358 		return;
359 
360 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
361 
362 	/* Drop requests trying to increase our current mss.
363 	 * Check done in __ip6_rt_update_pmtu() is too late.
364 	 */
365 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
366 		return;
367 
368 	dst = inet6_csk_update_pmtu(sk, mtu);
369 	if (!dst)
370 		return;
371 
372 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
373 		tcp_sync_mss(sk, dst_mtu(dst));
374 		tcp_simple_retransmit(sk);
375 	}
376 }
377 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
379 		u8 type, u8 code, int offset, __be32 info)
380 {
381 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
382 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
383 	struct net *net = dev_net(skb->dev);
384 	struct request_sock *fastopen;
385 	struct ipv6_pinfo *np;
386 	struct tcp_sock *tp;
387 	__u32 seq, snd_una;
388 	struct sock *sk;
389 	bool fatal;
390 	int err;
391 
392 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
393 					&hdr->daddr, th->dest,
394 					&hdr->saddr, ntohs(th->source),
395 					skb->dev->ifindex, inet6_sdif(skb));
396 
397 	if (!sk) {
398 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
399 				  ICMP6_MIB_INERRORS);
400 		return -ENOENT;
401 	}
402 
403 	if (sk->sk_state == TCP_TIME_WAIT) {
404 		inet_twsk_put(inet_twsk(sk));
405 		return 0;
406 	}
407 	seq = ntohl(th->seq);
408 	fatal = icmpv6_err_convert(type, code, &err);
409 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
410 		tcp_req_err(sk, seq, fatal);
411 		return 0;
412 	}
413 
414 	bh_lock_sock(sk);
415 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
416 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
417 
418 	if (sk->sk_state == TCP_CLOSE)
419 		goto out;
420 
421 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
422 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
423 		goto out;
424 	}
425 
426 	tp = tcp_sk(sk);
427 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428 	fastopen = rcu_dereference(tp->fastopen_rsk);
429 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430 	if (sk->sk_state != TCP_LISTEN &&
431 	    !between(seq, snd_una, tp->snd_nxt)) {
432 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433 		goto out;
434 	}
435 
436 	np = tcp_inet6_sk(sk);
437 
438 	if (type == NDISC_REDIRECT) {
439 		if (!sock_owned_by_user(sk)) {
440 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441 
442 			if (dst)
443 				dst->ops->redirect(dst, sk, skb);
444 		}
445 		goto out;
446 	}
447 
448 	if (type == ICMPV6_PKT_TOOBIG) {
449 		u32 mtu = ntohl(info);
450 
451 		/* We are not interested in TCP_LISTEN and open_requests
452 		 * (SYN-ACKs send out by Linux are always <576bytes so
453 		 * they should go through unfragmented).
454 		 */
455 		if (sk->sk_state == TCP_LISTEN)
456 			goto out;
457 
458 		if (!ip6_sk_accept_pmtu(sk))
459 			goto out;
460 
461 		if (mtu < IPV6_MIN_MTU)
462 			goto out;
463 
464 		WRITE_ONCE(tp->mtu_info, mtu);
465 
466 		if (!sock_owned_by_user(sk))
467 			tcp_v6_mtu_reduced(sk);
468 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469 					   &sk->sk_tsq_flags))
470 			sock_hold(sk);
471 		goto out;
472 	}
473 
474 
475 	/* Might be for an request_sock */
476 	switch (sk->sk_state) {
477 	case TCP_SYN_SENT:
478 	case TCP_SYN_RECV:
479 		/* Only in fast or simultaneous open. If a fast open socket is
480 		 * already accepted it is treated as a connected one below.
481 		 */
482 		if (fastopen && !fastopen->sk)
483 			break;
484 
485 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486 
487 		if (!sock_owned_by_user(sk)) {
488 			sk->sk_err = err;
489 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
490 
491 			tcp_done(sk);
492 		} else
493 			sk->sk_err_soft = err;
494 		goto out;
495 	case TCP_LISTEN:
496 		break;
497 	default:
498 		/* check if this ICMP message allows revert of backoff.
499 		 * (see RFC 6069)
500 		 */
501 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502 		    code == ICMPV6_NOROUTE)
503 			tcp_ld_RTO_revert(sk, seq);
504 	}
505 
506 	if (!sock_owned_by_user(sk) && np->recverr) {
507 		sk->sk_err = err;
508 		sk->sk_error_report(sk);
509 	} else
510 		sk->sk_err_soft = err;
511 
512 out:
513 	bh_unlock_sock(sk);
514 	sock_put(sk);
515 	return 0;
516 }
517 
518 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520 			      struct flowi *fl,
521 			      struct request_sock *req,
522 			      struct tcp_fastopen_cookie *foc,
523 			      enum tcp_synack_type synack_type,
524 			      struct sk_buff *syn_skb)
525 {
526 	struct inet_request_sock *ireq = inet_rsk(req);
527 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528 	struct ipv6_txoptions *opt;
529 	struct flowi6 *fl6 = &fl->u.ip6;
530 	struct sk_buff *skb;
531 	int err = -ENOMEM;
532 	u8 tclass;
533 
534 	/* First, grab a route. */
535 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536 					       IPPROTO_TCP)) == NULL)
537 		goto done;
538 
539 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540 
541 	if (skb) {
542 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543 				    &ireq->ir_v6_rmt_addr);
544 
545 		fl6->daddr = ireq->ir_v6_rmt_addr;
546 		if (np->repflow && ireq->pktopts)
547 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548 
549 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
550 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551 				(np->tclass & INET_ECN_MASK) :
552 				np->tclass;
553 
554 		if (!INET_ECN_is_capable(tclass) &&
555 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
556 			tclass |= INET_ECN_ECT_0;
557 
558 		rcu_read_lock();
559 		opt = ireq->ipv6_opt;
560 		if (!opt)
561 			opt = rcu_dereference(np->opt);
562 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563 			       tclass, sk->sk_priority);
564 		rcu_read_unlock();
565 		err = net_xmit_eval(err);
566 	}
567 
568 done:
569 	return err;
570 }
571 
572 
tcp_v6_reqsk_destructor(struct request_sock * req)573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575 	kfree(inet_rsk(req)->ipv6_opt);
576 	kfree_skb(inet_rsk(req)->pktopts);
577 }
578 
579 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581 						   const struct in6_addr *addr,
582 						   int l3index)
583 {
584 	return tcp_md5_do_lookup(sk, l3index,
585 				 (union tcp_md5_addr *)addr, AF_INET6);
586 }
587 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589 						const struct sock *addr_sk)
590 {
591 	int l3index;
592 
593 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594 						 addr_sk->sk_bound_dev_if);
595 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596 				    l3index);
597 }
598 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600 				 sockptr_t optval, int optlen)
601 {
602 	struct tcp_md5sig cmd;
603 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604 	int l3index = 0;
605 	u8 prefixlen;
606 
607 	if (optlen < sizeof(cmd))
608 		return -EINVAL;
609 
610 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 		return -EFAULT;
612 
613 	if (sin6->sin6_family != AF_INET6)
614 		return -EINVAL;
615 
616 	if (optname == TCP_MD5SIG_EXT &&
617 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
618 		prefixlen = cmd.tcpm_prefixlen;
619 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
620 					prefixlen > 32))
621 			return -EINVAL;
622 	} else {
623 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
624 	}
625 
626 	if (optname == TCP_MD5SIG_EXT &&
627 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
628 		struct net_device *dev;
629 
630 		rcu_read_lock();
631 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
632 		if (dev && netif_is_l3_master(dev))
633 			l3index = dev->ifindex;
634 		rcu_read_unlock();
635 
636 		/* ok to reference set/not set outside of rcu;
637 		 * right now device MUST be an L3 master
638 		 */
639 		if (!dev || !l3index)
640 			return -EINVAL;
641 	}
642 
643 	if (!cmd.tcpm_keylen) {
644 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
645 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
646 					      AF_INET, prefixlen,
647 					      l3index);
648 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
649 				      AF_INET6, prefixlen, l3index);
650 	}
651 
652 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
653 		return -EINVAL;
654 
655 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657 				      AF_INET, prefixlen, l3index,
658 				      cmd.tcpm_key, cmd.tcpm_keylen,
659 				      GFP_KERNEL);
660 
661 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
662 			      AF_INET6, prefixlen, l3index,
663 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
664 }
665 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)666 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
667 				   const struct in6_addr *daddr,
668 				   const struct in6_addr *saddr,
669 				   const struct tcphdr *th, int nbytes)
670 {
671 	struct tcp6_pseudohdr *bp;
672 	struct scatterlist sg;
673 	struct tcphdr *_th;
674 
675 	bp = hp->scratch;
676 	/* 1. TCP pseudo-header (RFC2460) */
677 	bp->saddr = *saddr;
678 	bp->daddr = *daddr;
679 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
680 	bp->len = cpu_to_be32(nbytes);
681 
682 	_th = (struct tcphdr *)(bp + 1);
683 	memcpy(_th, th, sizeof(*th));
684 	_th->check = 0;
685 
686 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
687 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
688 				sizeof(*bp) + sizeof(*th));
689 	return crypto_ahash_update(hp->md5_req);
690 }
691 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)692 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
693 			       const struct in6_addr *daddr, struct in6_addr *saddr,
694 			       const struct tcphdr *th)
695 {
696 	struct tcp_md5sig_pool *hp;
697 	struct ahash_request *req;
698 
699 	hp = tcp_get_md5sig_pool();
700 	if (!hp)
701 		goto clear_hash_noput;
702 	req = hp->md5_req;
703 
704 	if (crypto_ahash_init(req))
705 		goto clear_hash;
706 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
707 		goto clear_hash;
708 	if (tcp_md5_hash_key(hp, key))
709 		goto clear_hash;
710 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
711 	if (crypto_ahash_final(req))
712 		goto clear_hash;
713 
714 	tcp_put_md5sig_pool();
715 	return 0;
716 
717 clear_hash:
718 	tcp_put_md5sig_pool();
719 clear_hash_noput:
720 	memset(md5_hash, 0, 16);
721 	return 1;
722 }
723 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)724 static int tcp_v6_md5_hash_skb(char *md5_hash,
725 			       const struct tcp_md5sig_key *key,
726 			       const struct sock *sk,
727 			       const struct sk_buff *skb)
728 {
729 	const struct in6_addr *saddr, *daddr;
730 	struct tcp_md5sig_pool *hp;
731 	struct ahash_request *req;
732 	const struct tcphdr *th = tcp_hdr(skb);
733 
734 	if (sk) { /* valid for establish/request sockets */
735 		saddr = &sk->sk_v6_rcv_saddr;
736 		daddr = &sk->sk_v6_daddr;
737 	} else {
738 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
739 		saddr = &ip6h->saddr;
740 		daddr = &ip6h->daddr;
741 	}
742 
743 	hp = tcp_get_md5sig_pool();
744 	if (!hp)
745 		goto clear_hash_noput;
746 	req = hp->md5_req;
747 
748 	if (crypto_ahash_init(req))
749 		goto clear_hash;
750 
751 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
752 		goto clear_hash;
753 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
754 		goto clear_hash;
755 	if (tcp_md5_hash_key(hp, key))
756 		goto clear_hash;
757 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
758 	if (crypto_ahash_final(req))
759 		goto clear_hash;
760 
761 	tcp_put_md5sig_pool();
762 	return 0;
763 
764 clear_hash:
765 	tcp_put_md5sig_pool();
766 clear_hash_noput:
767 	memset(md5_hash, 0, 16);
768 	return 1;
769 }
770 
771 #endif
772 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)773 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
774 				    const struct sk_buff *skb,
775 				    int dif, int sdif)
776 {
777 #ifdef CONFIG_TCP_MD5SIG
778 	const __u8 *hash_location = NULL;
779 	struct tcp_md5sig_key *hash_expected;
780 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
781 	const struct tcphdr *th = tcp_hdr(skb);
782 	int genhash, l3index;
783 	u8 newhash[16];
784 
785 	/* sdif set, means packet ingressed via a device
786 	 * in an L3 domain and dif is set to the l3mdev
787 	 */
788 	l3index = sdif ? dif : 0;
789 
790 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
791 	hash_location = tcp_parse_md5sig_option(th);
792 
793 	/* We've parsed the options - do we have a hash? */
794 	if (!hash_expected && !hash_location)
795 		return false;
796 
797 	if (hash_expected && !hash_location) {
798 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
799 		return true;
800 	}
801 
802 	if (!hash_expected && hash_location) {
803 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
804 		return true;
805 	}
806 
807 	/* check the signature */
808 	genhash = tcp_v6_md5_hash_skb(newhash,
809 				      hash_expected,
810 				      NULL, skb);
811 
812 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
813 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
814 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
815 				     genhash ? "failed" : "mismatch",
816 				     &ip6h->saddr, ntohs(th->source),
817 				     &ip6h->daddr, ntohs(th->dest), l3index);
818 		return true;
819 	}
820 #endif
821 	return false;
822 }
823 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)824 static void tcp_v6_init_req(struct request_sock *req,
825 			    const struct sock *sk_listener,
826 			    struct sk_buff *skb)
827 {
828 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
829 	struct inet_request_sock *ireq = inet_rsk(req);
830 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
831 
832 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
833 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
834 
835 	/* So that link locals have meaning */
836 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
837 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
838 		ireq->ir_iif = tcp_v6_iif(skb);
839 
840 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
841 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
842 	     np->rxopt.bits.rxinfo ||
843 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
844 	     np->rxopt.bits.rxohlim || np->repflow)) {
845 		refcount_inc(&skb->users);
846 		ireq->pktopts = skb;
847 	}
848 }
849 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)850 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
851 					  struct flowi *fl,
852 					  const struct request_sock *req)
853 {
854 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
855 }
856 
857 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
858 	.family		=	AF_INET6,
859 	.obj_size	=	sizeof(struct tcp6_request_sock),
860 	.rtx_syn_ack	=	tcp_rtx_synack,
861 	.send_ack	=	tcp_v6_reqsk_send_ack,
862 	.destructor	=	tcp_v6_reqsk_destructor,
863 	.send_reset	=	tcp_v6_send_reset,
864 	.syn_ack_timeout =	tcp_syn_ack_timeout,
865 };
866 
867 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
868 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
869 				sizeof(struct ipv6hdr),
870 #ifdef CONFIG_TCP_MD5SIG
871 	.req_md5_lookup	=	tcp_v6_md5_lookup,
872 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
873 #endif
874 	.init_req	=	tcp_v6_init_req,
875 #ifdef CONFIG_SYN_COOKIES
876 	.cookie_init_seq =	cookie_v6_init_sequence,
877 #endif
878 	.route_req	=	tcp_v6_route_req,
879 	.init_seq	=	tcp_v6_init_seq,
880 	.init_ts_off	=	tcp_v6_init_ts_off,
881 	.send_synack	=	tcp_v6_send_synack,
882 };
883 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)884 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
885 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
886 				 int oif, struct tcp_md5sig_key *key, int rst,
887 				 u8 tclass, __be32 label, u32 priority)
888 {
889 	const struct tcphdr *th = tcp_hdr(skb);
890 	struct tcphdr *t1;
891 	struct sk_buff *buff;
892 	struct flowi6 fl6;
893 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
894 	struct sock *ctl_sk = net->ipv6.tcp_sk;
895 	unsigned int tot_len = sizeof(struct tcphdr);
896 	struct dst_entry *dst;
897 	__be32 *topt;
898 	__u32 mark = 0;
899 
900 	if (tsecr)
901 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
902 #ifdef CONFIG_TCP_MD5SIG
903 	if (key)
904 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
905 #endif
906 
907 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
908 			 GFP_ATOMIC);
909 	if (!buff)
910 		return;
911 
912 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
913 
914 	t1 = skb_push(buff, tot_len);
915 	skb_reset_transport_header(buff);
916 
917 	/* Swap the send and the receive. */
918 	memset(t1, 0, sizeof(*t1));
919 	t1->dest = th->source;
920 	t1->source = th->dest;
921 	t1->doff = tot_len / 4;
922 	t1->seq = htonl(seq);
923 	t1->ack_seq = htonl(ack);
924 	t1->ack = !rst || !th->ack;
925 	t1->rst = rst;
926 	t1->window = htons(win);
927 
928 	topt = (__be32 *)(t1 + 1);
929 
930 	if (tsecr) {
931 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
932 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
933 		*topt++ = htonl(tsval);
934 		*topt++ = htonl(tsecr);
935 	}
936 
937 #ifdef CONFIG_TCP_MD5SIG
938 	if (key) {
939 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
940 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
941 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
942 				    &ipv6_hdr(skb)->saddr,
943 				    &ipv6_hdr(skb)->daddr, t1);
944 	}
945 #endif
946 
947 	memset(&fl6, 0, sizeof(fl6));
948 	fl6.daddr = ipv6_hdr(skb)->saddr;
949 	fl6.saddr = ipv6_hdr(skb)->daddr;
950 	fl6.flowlabel = label;
951 
952 	buff->ip_summed = CHECKSUM_PARTIAL;
953 	buff->csum = 0;
954 
955 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
956 
957 	fl6.flowi6_proto = IPPROTO_TCP;
958 	if (rt6_need_strict(&fl6.daddr) && !oif)
959 		fl6.flowi6_oif = tcp_v6_iif(skb);
960 	else {
961 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
962 			oif = skb->skb_iif;
963 
964 		fl6.flowi6_oif = oif;
965 	}
966 
967 	if (sk) {
968 		if (sk->sk_state == TCP_TIME_WAIT) {
969 			mark = inet_twsk(sk)->tw_mark;
970 			/* autoflowlabel relies on buff->hash */
971 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
972 				     PKT_HASH_TYPE_L4);
973 		} else {
974 			mark = sk->sk_mark;
975 		}
976 		buff->tstamp = tcp_transmit_time(sk);
977 	}
978 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
979 	fl6.fl6_dport = t1->dest;
980 	fl6.fl6_sport = t1->source;
981 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
982 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
983 
984 	/* Pass a socket to ip6_dst_lookup either it is for RST
985 	 * Underlying function will use this to retrieve the network
986 	 * namespace
987 	 */
988 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
989 	if (!IS_ERR(dst)) {
990 		skb_dst_set(buff, dst);
991 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
992 			 tclass & ~INET_ECN_MASK, priority);
993 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
994 		if (rst)
995 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
996 		return;
997 	}
998 
999 	kfree_skb(buff);
1000 }
1001 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1002 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1003 {
1004 	const struct tcphdr *th = tcp_hdr(skb);
1005 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1006 	u32 seq = 0, ack_seq = 0;
1007 	struct tcp_md5sig_key *key = NULL;
1008 #ifdef CONFIG_TCP_MD5SIG
1009 	const __u8 *hash_location = NULL;
1010 	unsigned char newhash[16];
1011 	int genhash;
1012 	struct sock *sk1 = NULL;
1013 #endif
1014 	__be32 label = 0;
1015 	u32 priority = 0;
1016 	struct net *net;
1017 	int oif = 0;
1018 
1019 	if (th->rst)
1020 		return;
1021 
1022 	/* If sk not NULL, it means we did a successful lookup and incoming
1023 	 * route had to be correct. prequeue might have dropped our dst.
1024 	 */
1025 	if (!sk && !ipv6_unicast_destination(skb))
1026 		return;
1027 
1028 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1029 #ifdef CONFIG_TCP_MD5SIG
1030 	rcu_read_lock();
1031 	hash_location = tcp_parse_md5sig_option(th);
1032 	if (sk && sk_fullsock(sk)) {
1033 		int l3index;
1034 
1035 		/* sdif set, means packet ingressed via a device
1036 		 * in an L3 domain and inet_iif is set to it.
1037 		 */
1038 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1039 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1040 	} else if (hash_location) {
1041 		int dif = tcp_v6_iif_l3_slave(skb);
1042 		int sdif = tcp_v6_sdif(skb);
1043 		int l3index;
1044 
1045 		/*
1046 		 * active side is lost. Try to find listening socket through
1047 		 * source port, and then find md5 key through listening socket.
1048 		 * we are not loose security here:
1049 		 * Incoming packet is checked with md5 hash with finding key,
1050 		 * no RST generated if md5 hash doesn't match.
1051 		 */
1052 		sk1 = inet6_lookup_listener(net,
1053 					   &tcp_hashinfo, NULL, 0,
1054 					   &ipv6h->saddr,
1055 					   th->source, &ipv6h->daddr,
1056 					   ntohs(th->source), dif, sdif);
1057 		if (!sk1)
1058 			goto out;
1059 
1060 		/* sdif set, means packet ingressed via a device
1061 		 * in an L3 domain and dif is set to it.
1062 		 */
1063 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1064 
1065 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1066 		if (!key)
1067 			goto out;
1068 
1069 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1070 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1071 			goto out;
1072 	}
1073 #endif
1074 
1075 	if (th->ack)
1076 		seq = ntohl(th->ack_seq);
1077 	else
1078 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1079 			  (th->doff << 2);
1080 
1081 	if (sk) {
1082 		oif = sk->sk_bound_dev_if;
1083 		if (sk_fullsock(sk)) {
1084 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1085 
1086 			trace_tcp_send_reset(sk, skb);
1087 			if (np->repflow)
1088 				label = ip6_flowlabel(ipv6h);
1089 			priority = sk->sk_priority;
1090 		}
1091 		if (sk->sk_state == TCP_TIME_WAIT) {
1092 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1093 			priority = inet_twsk(sk)->tw_priority;
1094 		}
1095 	} else {
1096 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1097 			label = ip6_flowlabel(ipv6h);
1098 	}
1099 
1100 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1101 			     ipv6_get_dsfield(ipv6h), label, priority);
1102 
1103 #ifdef CONFIG_TCP_MD5SIG
1104 out:
1105 	rcu_read_unlock();
1106 #endif
1107 }
1108 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1109 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1110 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1111 			    struct tcp_md5sig_key *key, u8 tclass,
1112 			    __be32 label, u32 priority)
1113 {
1114 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1115 			     tclass, label, priority);
1116 }
1117 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1118 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1119 {
1120 	struct inet_timewait_sock *tw = inet_twsk(sk);
1121 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1122 
1123 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1124 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1125 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1126 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1127 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1128 
1129 	inet_twsk_put(tw);
1130 }
1131 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1132 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1133 				  struct request_sock *req)
1134 {
1135 	int l3index;
1136 
1137 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1138 
1139 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1140 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1141 	 */
1142 	/* RFC 7323 2.3
1143 	 * The window field (SEG.WND) of every outgoing segment, with the
1144 	 * exception of <SYN> segments, MUST be right-shifted by
1145 	 * Rcv.Wind.Shift bits:
1146 	 */
1147 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1148 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1149 			tcp_rsk(req)->rcv_nxt,
1150 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1151 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1152 			req->ts_recent, sk->sk_bound_dev_if,
1153 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1154 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1155 }
1156 
1157 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1158 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1159 {
1160 #ifdef CONFIG_SYN_COOKIES
1161 	const struct tcphdr *th = tcp_hdr(skb);
1162 
1163 	if (!th->syn)
1164 		sk = cookie_v6_check(sk, skb);
1165 #endif
1166 	return sk;
1167 }
1168 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1169 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1170 			 struct tcphdr *th, u32 *cookie)
1171 {
1172 	u16 mss = 0;
1173 #ifdef CONFIG_SYN_COOKIES
1174 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1175 				    &tcp_request_sock_ipv6_ops, sk, th);
1176 	if (mss) {
1177 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1178 		tcp_synq_overflow(sk);
1179 	}
1180 #endif
1181 	return mss;
1182 }
1183 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1184 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1185 {
1186 	if (skb->protocol == htons(ETH_P_IP))
1187 		return tcp_v4_conn_request(sk, skb);
1188 
1189 	if (!ipv6_unicast_destination(skb))
1190 		goto drop;
1191 
1192 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1193 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1194 		return 0;
1195 	}
1196 
1197 	return tcp_conn_request(&tcp6_request_sock_ops,
1198 				&tcp_request_sock_ipv6_ops, sk, skb);
1199 
1200 drop:
1201 	tcp_listendrop(sk);
1202 	return 0; /* don't send reset */
1203 }
1204 
tcp_v6_restore_cb(struct sk_buff * skb)1205 static void tcp_v6_restore_cb(struct sk_buff *skb)
1206 {
1207 	/* We need to move header back to the beginning if xfrm6_policy_check()
1208 	 * and tcp_v6_fill_cb() are going to be called again.
1209 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1210 	 */
1211 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1212 		sizeof(struct inet6_skb_parm));
1213 }
1214 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1215 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1216 					 struct request_sock *req,
1217 					 struct dst_entry *dst,
1218 					 struct request_sock *req_unhash,
1219 					 bool *own_req)
1220 {
1221 	struct inet_request_sock *ireq;
1222 	struct ipv6_pinfo *newnp;
1223 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1224 	struct ipv6_txoptions *opt;
1225 	struct inet_sock *newinet;
1226 	bool found_dup_sk = false;
1227 	struct tcp_sock *newtp;
1228 	struct sock *newsk;
1229 #ifdef CONFIG_TCP_MD5SIG
1230 	struct tcp_md5sig_key *key;
1231 	int l3index;
1232 #endif
1233 	struct flowi6 fl6;
1234 
1235 	if (skb->protocol == htons(ETH_P_IP)) {
1236 		/*
1237 		 *	v6 mapped
1238 		 */
1239 
1240 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1241 					     req_unhash, own_req);
1242 
1243 		if (!newsk)
1244 			return NULL;
1245 
1246 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1247 
1248 		newinet = inet_sk(newsk);
1249 		newnp = tcp_inet6_sk(newsk);
1250 		newtp = tcp_sk(newsk);
1251 
1252 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1253 
1254 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1255 
1256 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1257 		if (sk_is_mptcp(newsk))
1258 			mptcpv6_handle_mapped(newsk, true);
1259 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1260 #ifdef CONFIG_TCP_MD5SIG
1261 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1262 #endif
1263 
1264 		newnp->ipv6_mc_list = NULL;
1265 		newnp->ipv6_ac_list = NULL;
1266 		newnp->ipv6_fl_list = NULL;
1267 		newnp->pktoptions  = NULL;
1268 		newnp->opt	   = NULL;
1269 		newnp->mcast_oif   = inet_iif(skb);
1270 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1271 		newnp->rcv_flowinfo = 0;
1272 		if (np->repflow)
1273 			newnp->flow_label = 0;
1274 
1275 		/*
1276 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1277 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1278 		 * that function for the gory details. -acme
1279 		 */
1280 
1281 		/* It is tricky place. Until this moment IPv4 tcp
1282 		   worked with IPv6 icsk.icsk_af_ops.
1283 		   Sync it now.
1284 		 */
1285 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1286 
1287 		return newsk;
1288 	}
1289 
1290 	ireq = inet_rsk(req);
1291 
1292 	if (sk_acceptq_is_full(sk))
1293 		goto out_overflow;
1294 
1295 	if (!dst) {
1296 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1297 		if (!dst)
1298 			goto out;
1299 	}
1300 
1301 	newsk = tcp_create_openreq_child(sk, req, skb);
1302 	if (!newsk)
1303 		goto out_nonewsk;
1304 
1305 	/*
1306 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1307 	 * count here, tcp_create_openreq_child now does this for us, see the
1308 	 * comment in that function for the gory details. -acme
1309 	 */
1310 
1311 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1312 	ip6_dst_store(newsk, dst, NULL, NULL);
1313 	inet6_sk_rx_dst_set(newsk, skb);
1314 
1315 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1316 
1317 	newtp = tcp_sk(newsk);
1318 	newinet = inet_sk(newsk);
1319 	newnp = tcp_inet6_sk(newsk);
1320 
1321 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1322 
1323 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1324 	newnp->saddr = ireq->ir_v6_loc_addr;
1325 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1326 	newsk->sk_bound_dev_if = ireq->ir_iif;
1327 
1328 	/* Now IPv6 options...
1329 
1330 	   First: no IPv4 options.
1331 	 */
1332 	newinet->inet_opt = NULL;
1333 	newnp->ipv6_mc_list = NULL;
1334 	newnp->ipv6_ac_list = NULL;
1335 	newnp->ipv6_fl_list = NULL;
1336 
1337 	/* Clone RX bits */
1338 	newnp->rxopt.all = np->rxopt.all;
1339 
1340 	newnp->pktoptions = NULL;
1341 	newnp->opt	  = NULL;
1342 	newnp->mcast_oif  = tcp_v6_iif(skb);
1343 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1344 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1345 	if (np->repflow)
1346 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1347 
1348 	/* Set ToS of the new socket based upon the value of incoming SYN.
1349 	 * ECT bits are set later in tcp_init_transfer().
1350 	 */
1351 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1352 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1353 
1354 	/* Clone native IPv6 options from listening socket (if any)
1355 
1356 	   Yes, keeping reference count would be much more clever,
1357 	   but we make one more one thing there: reattach optmem
1358 	   to newsk.
1359 	 */
1360 	opt = ireq->ipv6_opt;
1361 	if (!opt)
1362 		opt = rcu_dereference(np->opt);
1363 	if (opt) {
1364 		opt = ipv6_dup_options(newsk, opt);
1365 		RCU_INIT_POINTER(newnp->opt, opt);
1366 	}
1367 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1368 	if (opt)
1369 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1370 						    opt->opt_flen;
1371 
1372 	tcp_ca_openreq_child(newsk, dst);
1373 
1374 	tcp_sync_mss(newsk, dst_mtu(dst));
1375 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1376 
1377 	tcp_initialize_rcv_mss(newsk);
1378 
1379 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1380 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1381 
1382 #ifdef CONFIG_TCP_MD5SIG
1383 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1384 
1385 	/* Copy over the MD5 key from the original socket */
1386 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1387 	if (key) {
1388 		/* We're using one, so create a matching key
1389 		 * on the newsk structure. If we fail to get
1390 		 * memory, then we end up not copying the key
1391 		 * across. Shucks.
1392 		 */
1393 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1394 			       AF_INET6, 128, l3index, key->key, key->keylen,
1395 			       sk_gfp_mask(sk, GFP_ATOMIC));
1396 	}
1397 #endif
1398 
1399 	if (__inet_inherit_port(sk, newsk) < 0) {
1400 		inet_csk_prepare_forced_close(newsk);
1401 		tcp_done(newsk);
1402 		goto out;
1403 	}
1404 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1405 				       &found_dup_sk);
1406 	if (*own_req) {
1407 		tcp_move_syn(newtp, req);
1408 
1409 		/* Clone pktoptions received with SYN, if we own the req */
1410 		if (ireq->pktopts) {
1411 			newnp->pktoptions = skb_clone(ireq->pktopts,
1412 						      sk_gfp_mask(sk, GFP_ATOMIC));
1413 			consume_skb(ireq->pktopts);
1414 			ireq->pktopts = NULL;
1415 			if (newnp->pktoptions) {
1416 				tcp_v6_restore_cb(newnp->pktoptions);
1417 				skb_set_owner_r(newnp->pktoptions, newsk);
1418 			}
1419 		}
1420 	} else {
1421 		if (!req_unhash && found_dup_sk) {
1422 			/* This code path should only be executed in the
1423 			 * syncookie case only
1424 			 */
1425 			bh_unlock_sock(newsk);
1426 			sock_put(newsk);
1427 			newsk = NULL;
1428 		}
1429 	}
1430 
1431 	return newsk;
1432 
1433 out_overflow:
1434 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1435 out_nonewsk:
1436 	dst_release(dst);
1437 out:
1438 	tcp_listendrop(sk);
1439 	return NULL;
1440 }
1441 
1442 /* The socket must have it's spinlock held when we get
1443  * here, unless it is a TCP_LISTEN socket.
1444  *
1445  * We have a potential double-lock case here, so even when
1446  * doing backlog processing we use the BH locking scheme.
1447  * This is because we cannot sleep with the original spinlock
1448  * held.
1449  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1450 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1451 {
1452 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1453 	struct sk_buff *opt_skb = NULL;
1454 	struct tcp_sock *tp;
1455 
1456 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1457 	   goes to IPv4 receive handler and backlogged.
1458 	   From backlog it always goes here. Kerboom...
1459 	   Fortunately, tcp_rcv_established and rcv_established
1460 	   handle them correctly, but it is not case with
1461 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1462 	 */
1463 
1464 	if (skb->protocol == htons(ETH_P_IP))
1465 		return tcp_v4_do_rcv(sk, skb);
1466 
1467 	/*
1468 	 *	socket locking is here for SMP purposes as backlog rcv
1469 	 *	is currently called with bh processing disabled.
1470 	 */
1471 
1472 	/* Do Stevens' IPV6_PKTOPTIONS.
1473 
1474 	   Yes, guys, it is the only place in our code, where we
1475 	   may make it not affecting IPv4.
1476 	   The rest of code is protocol independent,
1477 	   and I do not like idea to uglify IPv4.
1478 
1479 	   Actually, all the idea behind IPV6_PKTOPTIONS
1480 	   looks not very well thought. For now we latch
1481 	   options, received in the last packet, enqueued
1482 	   by tcp. Feel free to propose better solution.
1483 					       --ANK (980728)
1484 	 */
1485 	if (np->rxopt.all)
1486 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1487 
1488 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1489 		struct dst_entry *dst;
1490 
1491 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1492 						lockdep_sock_is_held(sk));
1493 
1494 		sock_rps_save_rxhash(sk, skb);
1495 		sk_mark_napi_id(sk, skb);
1496 		if (dst) {
1497 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1498 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1499 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1500 				dst_release(dst);
1501 			}
1502 		}
1503 
1504 		tcp_rcv_established(sk, skb);
1505 		if (opt_skb)
1506 			goto ipv6_pktoptions;
1507 		return 0;
1508 	}
1509 
1510 	if (tcp_checksum_complete(skb))
1511 		goto csum_err;
1512 
1513 	if (sk->sk_state == TCP_LISTEN) {
1514 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1515 
1516 		if (!nsk)
1517 			goto discard;
1518 
1519 		if (nsk != sk) {
1520 			if (tcp_child_process(sk, nsk, skb))
1521 				goto reset;
1522 			if (opt_skb)
1523 				__kfree_skb(opt_skb);
1524 			return 0;
1525 		}
1526 	} else
1527 		sock_rps_save_rxhash(sk, skb);
1528 
1529 	if (tcp_rcv_state_process(sk, skb))
1530 		goto reset;
1531 	if (opt_skb)
1532 		goto ipv6_pktoptions;
1533 	return 0;
1534 
1535 reset:
1536 	tcp_v6_send_reset(sk, skb);
1537 discard:
1538 	if (opt_skb)
1539 		__kfree_skb(opt_skb);
1540 	kfree_skb(skb);
1541 	return 0;
1542 csum_err:
1543 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1544 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1545 	goto discard;
1546 
1547 
1548 ipv6_pktoptions:
1549 	/* Do you ask, what is it?
1550 
1551 	   1. skb was enqueued by tcp.
1552 	   2. skb is added to tail of read queue, rather than out of order.
1553 	   3. socket is not in passive state.
1554 	   4. Finally, it really contains options, which user wants to receive.
1555 	 */
1556 	tp = tcp_sk(sk);
1557 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1558 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1559 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1560 			np->mcast_oif = tcp_v6_iif(opt_skb);
1561 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1562 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1563 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1564 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1565 		if (np->repflow)
1566 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1567 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1568 			skb_set_owner_r(opt_skb, sk);
1569 			tcp_v6_restore_cb(opt_skb);
1570 			opt_skb = xchg(&np->pktoptions, opt_skb);
1571 		} else {
1572 			__kfree_skb(opt_skb);
1573 			opt_skb = xchg(&np->pktoptions, NULL);
1574 		}
1575 	}
1576 
1577 	kfree_skb(opt_skb);
1578 	return 0;
1579 }
1580 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1581 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1582 			   const struct tcphdr *th)
1583 {
1584 	/* This is tricky: we move IP6CB at its correct location into
1585 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1586 	 * _decode_session6() uses IP6CB().
1587 	 * barrier() makes sure compiler won't play aliasing games.
1588 	 */
1589 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1590 		sizeof(struct inet6_skb_parm));
1591 	barrier();
1592 
1593 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1594 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1595 				    skb->len - th->doff*4);
1596 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1597 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1598 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1599 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1600 	TCP_SKB_CB(skb)->sacked = 0;
1601 	TCP_SKB_CB(skb)->has_rxtstamp =
1602 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1603 }
1604 
tcp_v6_rcv(struct sk_buff * skb)1605 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1606 {
1607 	struct sk_buff *skb_to_free;
1608 	int sdif = inet6_sdif(skb);
1609 	int dif = inet6_iif(skb);
1610 	const struct tcphdr *th;
1611 	const struct ipv6hdr *hdr;
1612 	bool refcounted;
1613 	struct sock *sk;
1614 	int ret;
1615 	struct net *net = dev_net(skb->dev);
1616 
1617 	if (skb->pkt_type != PACKET_HOST)
1618 		goto discard_it;
1619 
1620 	/*
1621 	 *	Count it even if it's bad.
1622 	 */
1623 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1624 
1625 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1626 		goto discard_it;
1627 
1628 	th = (const struct tcphdr *)skb->data;
1629 
1630 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1631 		goto bad_packet;
1632 	if (!pskb_may_pull(skb, th->doff*4))
1633 		goto discard_it;
1634 
1635 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1636 		goto csum_error;
1637 
1638 	th = (const struct tcphdr *)skb->data;
1639 	hdr = ipv6_hdr(skb);
1640 
1641 lookup:
1642 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1643 				th->source, th->dest, inet6_iif(skb), sdif,
1644 				&refcounted);
1645 	if (!sk)
1646 		goto no_tcp_socket;
1647 
1648 process:
1649 	if (sk->sk_state == TCP_TIME_WAIT)
1650 		goto do_time_wait;
1651 
1652 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1653 		struct request_sock *req = inet_reqsk(sk);
1654 		bool req_stolen = false;
1655 		struct sock *nsk;
1656 
1657 		sk = req->rsk_listener;
1658 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1659 			sk_drops_add(sk, skb);
1660 			reqsk_put(req);
1661 			goto discard_it;
1662 		}
1663 		if (tcp_checksum_complete(skb)) {
1664 			reqsk_put(req);
1665 			goto csum_error;
1666 		}
1667 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1668 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1669 			goto lookup;
1670 		}
1671 		sock_hold(sk);
1672 		refcounted = true;
1673 		nsk = NULL;
1674 		if (!tcp_filter(sk, skb)) {
1675 			th = (const struct tcphdr *)skb->data;
1676 			hdr = ipv6_hdr(skb);
1677 			tcp_v6_fill_cb(skb, hdr, th);
1678 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1679 		}
1680 		if (!nsk) {
1681 			reqsk_put(req);
1682 			if (req_stolen) {
1683 				/* Another cpu got exclusive access to req
1684 				 * and created a full blown socket.
1685 				 * Try to feed this packet to this socket
1686 				 * instead of discarding it.
1687 				 */
1688 				tcp_v6_restore_cb(skb);
1689 				sock_put(sk);
1690 				goto lookup;
1691 			}
1692 			goto discard_and_relse;
1693 		}
1694 		if (nsk == sk) {
1695 			reqsk_put(req);
1696 			tcp_v6_restore_cb(skb);
1697 		} else if (tcp_child_process(sk, nsk, skb)) {
1698 			tcp_v6_send_reset(nsk, skb);
1699 			goto discard_and_relse;
1700 		} else {
1701 			sock_put(sk);
1702 			return 0;
1703 		}
1704 	}
1705 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1706 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707 		goto discard_and_relse;
1708 	}
1709 
1710 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1711 		goto discard_and_relse;
1712 
1713 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1714 		goto discard_and_relse;
1715 
1716 	if (tcp_filter(sk, skb))
1717 		goto discard_and_relse;
1718 	th = (const struct tcphdr *)skb->data;
1719 	hdr = ipv6_hdr(skb);
1720 	tcp_v6_fill_cb(skb, hdr, th);
1721 
1722 	skb->dev = NULL;
1723 
1724 	if (sk->sk_state == TCP_LISTEN) {
1725 		ret = tcp_v6_do_rcv(sk, skb);
1726 		goto put_and_return;
1727 	}
1728 
1729 	sk_incoming_cpu_update(sk);
1730 
1731 	bh_lock_sock_nested(sk);
1732 	tcp_segs_in(tcp_sk(sk), skb);
1733 	ret = 0;
1734 	if (!sock_owned_by_user(sk)) {
1735 		skb_to_free = sk->sk_rx_skb_cache;
1736 		sk->sk_rx_skb_cache = NULL;
1737 		ret = tcp_v6_do_rcv(sk, skb);
1738 	} else {
1739 		if (tcp_add_backlog(sk, skb))
1740 			goto discard_and_relse;
1741 		skb_to_free = NULL;
1742 	}
1743 	bh_unlock_sock(sk);
1744 	if (skb_to_free)
1745 		__kfree_skb(skb_to_free);
1746 put_and_return:
1747 	if (refcounted)
1748 		sock_put(sk);
1749 	return ret ? -1 : 0;
1750 
1751 no_tcp_socket:
1752 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1753 		goto discard_it;
1754 
1755 	tcp_v6_fill_cb(skb, hdr, th);
1756 
1757 	if (tcp_checksum_complete(skb)) {
1758 csum_error:
1759 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1760 bad_packet:
1761 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1762 	} else {
1763 		tcp_v6_send_reset(NULL, skb);
1764 	}
1765 
1766 discard_it:
1767 	kfree_skb(skb);
1768 	return 0;
1769 
1770 discard_and_relse:
1771 	sk_drops_add(sk, skb);
1772 	if (refcounted)
1773 		sock_put(sk);
1774 	goto discard_it;
1775 
1776 do_time_wait:
1777 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1778 		inet_twsk_put(inet_twsk(sk));
1779 		goto discard_it;
1780 	}
1781 
1782 	tcp_v6_fill_cb(skb, hdr, th);
1783 
1784 	if (tcp_checksum_complete(skb)) {
1785 		inet_twsk_put(inet_twsk(sk));
1786 		goto csum_error;
1787 	}
1788 
1789 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1790 	case TCP_TW_SYN:
1791 	{
1792 		struct sock *sk2;
1793 
1794 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1795 					    skb, __tcp_hdrlen(th),
1796 					    &ipv6_hdr(skb)->saddr, th->source,
1797 					    &ipv6_hdr(skb)->daddr,
1798 					    ntohs(th->dest),
1799 					    tcp_v6_iif_l3_slave(skb),
1800 					    sdif);
1801 		if (sk2) {
1802 			struct inet_timewait_sock *tw = inet_twsk(sk);
1803 			inet_twsk_deschedule_put(tw);
1804 			sk = sk2;
1805 			tcp_v6_restore_cb(skb);
1806 			refcounted = false;
1807 			goto process;
1808 		}
1809 	}
1810 		/* to ACK */
1811 		fallthrough;
1812 	case TCP_TW_ACK:
1813 		tcp_v6_timewait_ack(sk, skb);
1814 		break;
1815 	case TCP_TW_RST:
1816 		tcp_v6_send_reset(sk, skb);
1817 		inet_twsk_deschedule_put(inet_twsk(sk));
1818 		goto discard_it;
1819 	case TCP_TW_SUCCESS:
1820 		;
1821 	}
1822 	goto discard_it;
1823 }
1824 
tcp_v6_early_demux(struct sk_buff * skb)1825 void tcp_v6_early_demux(struct sk_buff *skb)
1826 {
1827 	const struct ipv6hdr *hdr;
1828 	const struct tcphdr *th;
1829 	struct sock *sk;
1830 
1831 	if (skb->pkt_type != PACKET_HOST)
1832 		return;
1833 
1834 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1835 		return;
1836 
1837 	hdr = ipv6_hdr(skb);
1838 	th = tcp_hdr(skb);
1839 
1840 	if (th->doff < sizeof(struct tcphdr) / 4)
1841 		return;
1842 
1843 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1844 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1845 					&hdr->saddr, th->source,
1846 					&hdr->daddr, ntohs(th->dest),
1847 					inet6_iif(skb), inet6_sdif(skb));
1848 	if (sk) {
1849 		skb->sk = sk;
1850 		skb->destructor = sock_edemux;
1851 		if (sk_fullsock(sk)) {
1852 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1853 
1854 			if (dst)
1855 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1856 			if (dst &&
1857 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1858 				skb_dst_set_noref(skb, dst);
1859 		}
1860 	}
1861 }
1862 
1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1865 	.twsk_unique	= tcp_twsk_unique,
1866 	.twsk_destructor = tcp_twsk_destructor,
1867 };
1868 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1869 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1870 {
1871 	struct ipv6_pinfo *np = inet6_sk(sk);
1872 
1873 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1874 }
1875 
1876 const struct inet_connection_sock_af_ops ipv6_specific = {
1877 	.queue_xmit	   = inet6_csk_xmit,
1878 	.send_check	   = tcp_v6_send_check,
1879 	.rebuild_header	   = inet6_sk_rebuild_header,
1880 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1881 	.conn_request	   = tcp_v6_conn_request,
1882 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1883 	.net_header_len	   = sizeof(struct ipv6hdr),
1884 	.net_frag_header_len = sizeof(struct frag_hdr),
1885 	.setsockopt	   = ipv6_setsockopt,
1886 	.getsockopt	   = ipv6_getsockopt,
1887 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1888 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1889 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1890 };
1891 
1892 #ifdef CONFIG_TCP_MD5SIG
1893 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1894 	.md5_lookup	=	tcp_v6_md5_lookup,
1895 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1896 	.md5_parse	=	tcp_v6_parse_md5_keys,
1897 };
1898 #endif
1899 
1900 /*
1901  *	TCP over IPv4 via INET6 API
1902  */
1903 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1904 	.queue_xmit	   = ip_queue_xmit,
1905 	.send_check	   = tcp_v4_send_check,
1906 	.rebuild_header	   = inet_sk_rebuild_header,
1907 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1908 	.conn_request	   = tcp_v6_conn_request,
1909 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1910 	.net_header_len	   = sizeof(struct iphdr),
1911 	.setsockopt	   = ipv6_setsockopt,
1912 	.getsockopt	   = ipv6_getsockopt,
1913 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1914 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1915 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1916 };
1917 
1918 #ifdef CONFIG_TCP_MD5SIG
1919 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1920 	.md5_lookup	=	tcp_v4_md5_lookup,
1921 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1922 	.md5_parse	=	tcp_v6_parse_md5_keys,
1923 };
1924 #endif
1925 
1926 /* NOTE: A lot of things set to zero explicitly by call to
1927  *       sk_alloc() so need not be done here.
1928  */
tcp_v6_init_sock(struct sock * sk)1929 static int tcp_v6_init_sock(struct sock *sk)
1930 {
1931 	struct inet_connection_sock *icsk = inet_csk(sk);
1932 
1933 	tcp_init_sock(sk);
1934 
1935 	icsk->icsk_af_ops = &ipv6_specific;
1936 
1937 #ifdef CONFIG_TCP_MD5SIG
1938 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1939 #endif
1940 
1941 	return 0;
1942 }
1943 
tcp_v6_destroy_sock(struct sock * sk)1944 static void tcp_v6_destroy_sock(struct sock *sk)
1945 {
1946 	tcp_v4_destroy_sock(sk);
1947 	inet6_destroy_sock(sk);
1948 }
1949 
1950 #ifdef CONFIG_PROC_FS
1951 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1952 static void get_openreq6(struct seq_file *seq,
1953 			 const struct request_sock *req, int i)
1954 {
1955 	long ttd = req->rsk_timer.expires - jiffies;
1956 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1957 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1958 
1959 	if (ttd < 0)
1960 		ttd = 0;
1961 
1962 	seq_printf(seq,
1963 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1964 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1965 		   i,
1966 		   src->s6_addr32[0], src->s6_addr32[1],
1967 		   src->s6_addr32[2], src->s6_addr32[3],
1968 		   inet_rsk(req)->ir_num,
1969 		   dest->s6_addr32[0], dest->s6_addr32[1],
1970 		   dest->s6_addr32[2], dest->s6_addr32[3],
1971 		   ntohs(inet_rsk(req)->ir_rmt_port),
1972 		   TCP_SYN_RECV,
1973 		   0, 0, /* could print option size, but that is af dependent. */
1974 		   1,   /* timers active (only the expire timer) */
1975 		   jiffies_to_clock_t(ttd),
1976 		   req->num_timeout,
1977 		   from_kuid_munged(seq_user_ns(seq),
1978 				    sock_i_uid(req->rsk_listener)),
1979 		   0,  /* non standard timer */
1980 		   0, /* open_requests have no inode */
1981 		   0, req);
1982 }
1983 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1984 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1985 {
1986 	const struct in6_addr *dest, *src;
1987 	__u16 destp, srcp;
1988 	int timer_active;
1989 	unsigned long timer_expires;
1990 	const struct inet_sock *inet = inet_sk(sp);
1991 	const struct tcp_sock *tp = tcp_sk(sp);
1992 	const struct inet_connection_sock *icsk = inet_csk(sp);
1993 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1994 	int rx_queue;
1995 	int state;
1996 
1997 	dest  = &sp->sk_v6_daddr;
1998 	src   = &sp->sk_v6_rcv_saddr;
1999 	destp = ntohs(inet->inet_dport);
2000 	srcp  = ntohs(inet->inet_sport);
2001 
2002 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2003 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2004 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2005 		timer_active	= 1;
2006 		timer_expires	= icsk->icsk_timeout;
2007 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2008 		timer_active	= 4;
2009 		timer_expires	= icsk->icsk_timeout;
2010 	} else if (timer_pending(&sp->sk_timer)) {
2011 		timer_active	= 2;
2012 		timer_expires	= sp->sk_timer.expires;
2013 	} else {
2014 		timer_active	= 0;
2015 		timer_expires = jiffies;
2016 	}
2017 
2018 	state = inet_sk_state_load(sp);
2019 	if (state == TCP_LISTEN)
2020 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2021 	else
2022 		/* Because we don't lock the socket,
2023 		 * we might find a transient negative value.
2024 		 */
2025 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2026 				      READ_ONCE(tp->copied_seq), 0);
2027 
2028 	seq_printf(seq,
2029 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2030 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2031 		   i,
2032 		   src->s6_addr32[0], src->s6_addr32[1],
2033 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2034 		   dest->s6_addr32[0], dest->s6_addr32[1],
2035 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2036 		   state,
2037 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2038 		   rx_queue,
2039 		   timer_active,
2040 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2041 		   icsk->icsk_retransmits,
2042 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2043 		   icsk->icsk_probes_out,
2044 		   sock_i_ino(sp),
2045 		   refcount_read(&sp->sk_refcnt), sp,
2046 		   jiffies_to_clock_t(icsk->icsk_rto),
2047 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2048 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2049 		   tp->snd_cwnd,
2050 		   state == TCP_LISTEN ?
2051 			fastopenq->max_qlen :
2052 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2053 		   );
2054 }
2055 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2056 static void get_timewait6_sock(struct seq_file *seq,
2057 			       struct inet_timewait_sock *tw, int i)
2058 {
2059 	long delta = tw->tw_timer.expires - jiffies;
2060 	const struct in6_addr *dest, *src;
2061 	__u16 destp, srcp;
2062 
2063 	dest = &tw->tw_v6_daddr;
2064 	src  = &tw->tw_v6_rcv_saddr;
2065 	destp = ntohs(tw->tw_dport);
2066 	srcp  = ntohs(tw->tw_sport);
2067 
2068 	seq_printf(seq,
2069 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2071 		   i,
2072 		   src->s6_addr32[0], src->s6_addr32[1],
2073 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2074 		   dest->s6_addr32[0], dest->s6_addr32[1],
2075 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2076 		   tw->tw_substate, 0, 0,
2077 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2078 		   refcount_read(&tw->tw_refcnt), tw);
2079 }
2080 
tcp6_seq_show(struct seq_file * seq,void * v)2081 static int tcp6_seq_show(struct seq_file *seq, void *v)
2082 {
2083 	struct tcp_iter_state *st;
2084 	struct sock *sk = v;
2085 
2086 	if (v == SEQ_START_TOKEN) {
2087 		seq_puts(seq,
2088 			 "  sl  "
2089 			 "local_address                         "
2090 			 "remote_address                        "
2091 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2092 			 "   uid  timeout inode\n");
2093 		goto out;
2094 	}
2095 	st = seq->private;
2096 
2097 	if (sk->sk_state == TCP_TIME_WAIT)
2098 		get_timewait6_sock(seq, v, st->num);
2099 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2100 		get_openreq6(seq, v, st->num);
2101 	else
2102 		get_tcp6_sock(seq, v, st->num);
2103 out:
2104 	return 0;
2105 }
2106 
2107 static const struct seq_operations tcp6_seq_ops = {
2108 	.show		= tcp6_seq_show,
2109 	.start		= tcp_seq_start,
2110 	.next		= tcp_seq_next,
2111 	.stop		= tcp_seq_stop,
2112 };
2113 
2114 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2115 	.family		= AF_INET6,
2116 };
2117 
tcp6_proc_init(struct net * net)2118 int __net_init tcp6_proc_init(struct net *net)
2119 {
2120 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2121 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2122 		return -ENOMEM;
2123 	return 0;
2124 }
2125 
tcp6_proc_exit(struct net * net)2126 void tcp6_proc_exit(struct net *net)
2127 {
2128 	remove_proc_entry("tcp6", net->proc_net);
2129 }
2130 #endif
2131 
2132 struct proto tcpv6_prot = {
2133 	.name			= "TCPv6",
2134 	.owner			= THIS_MODULE,
2135 	.close			= tcp_close,
2136 	.pre_connect		= tcp_v6_pre_connect,
2137 	.connect		= tcp_v6_connect,
2138 	.disconnect		= tcp_disconnect,
2139 	.accept			= inet_csk_accept,
2140 	.ioctl			= tcp_ioctl,
2141 	.init			= tcp_v6_init_sock,
2142 	.destroy		= tcp_v6_destroy_sock,
2143 	.shutdown		= tcp_shutdown,
2144 	.setsockopt		= tcp_setsockopt,
2145 	.getsockopt		= tcp_getsockopt,
2146 	.keepalive		= tcp_set_keepalive,
2147 	.recvmsg		= tcp_recvmsg,
2148 	.sendmsg		= tcp_sendmsg,
2149 	.sendpage		= tcp_sendpage,
2150 	.backlog_rcv		= tcp_v6_do_rcv,
2151 	.release_cb		= tcp_release_cb,
2152 	.hash			= inet6_hash,
2153 	.unhash			= inet_unhash,
2154 	.get_port		= inet_csk_get_port,
2155 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2156 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2157 	.stream_memory_free	= tcp_stream_memory_free,
2158 	.sockets_allocated	= &tcp_sockets_allocated,
2159 	.memory_allocated	= &tcp_memory_allocated,
2160 	.memory_pressure	= &tcp_memory_pressure,
2161 	.orphan_count		= &tcp_orphan_count,
2162 	.sysctl_mem		= sysctl_tcp_mem,
2163 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2164 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2165 	.max_header		= MAX_TCP_HEADER,
2166 	.obj_size		= sizeof(struct tcp6_sock),
2167 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2168 	.twsk_prot		= &tcp6_timewait_sock_ops,
2169 	.rsk_prot		= &tcp6_request_sock_ops,
2170 	.h.hashinfo		= &tcp_hashinfo,
2171 	.no_autobind		= true,
2172 	.diag_destroy		= tcp_abort,
2173 };
2174 EXPORT_SYMBOL_GPL(tcpv6_prot);
2175 
2176 static const struct inet6_protocol tcpv6_protocol = {
2177 	.handler	=	tcp_v6_rcv,
2178 	.err_handler	=	tcp_v6_err,
2179 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2180 };
2181 
2182 static struct inet_protosw tcpv6_protosw = {
2183 	.type		=	SOCK_STREAM,
2184 	.protocol	=	IPPROTO_TCP,
2185 	.prot		=	&tcpv6_prot,
2186 	.ops		=	&inet6_stream_ops,
2187 	.flags		=	INET_PROTOSW_PERMANENT |
2188 				INET_PROTOSW_ICSK,
2189 };
2190 
tcpv6_net_init(struct net * net)2191 static int __net_init tcpv6_net_init(struct net *net)
2192 {
2193 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2194 				    SOCK_RAW, IPPROTO_TCP, net);
2195 }
2196 
tcpv6_net_exit(struct net * net)2197 static void __net_exit tcpv6_net_exit(struct net *net)
2198 {
2199 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2200 }
2201 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2202 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2203 {
2204 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2205 }
2206 
2207 static struct pernet_operations tcpv6_net_ops = {
2208 	.init	    = tcpv6_net_init,
2209 	.exit	    = tcpv6_net_exit,
2210 	.exit_batch = tcpv6_net_exit_batch,
2211 };
2212 
tcpv6_init(void)2213 int __init tcpv6_init(void)
2214 {
2215 	int ret;
2216 
2217 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2218 	if (ret)
2219 		goto out;
2220 
2221 	/* register inet6 protocol */
2222 	ret = inet6_register_protosw(&tcpv6_protosw);
2223 	if (ret)
2224 		goto out_tcpv6_protocol;
2225 
2226 	ret = register_pernet_subsys(&tcpv6_net_ops);
2227 	if (ret)
2228 		goto out_tcpv6_protosw;
2229 
2230 	ret = mptcpv6_init();
2231 	if (ret)
2232 		goto out_tcpv6_pernet_subsys;
2233 
2234 out:
2235 	return ret;
2236 
2237 out_tcpv6_pernet_subsys:
2238 	unregister_pernet_subsys(&tcpv6_net_ops);
2239 out_tcpv6_protosw:
2240 	inet6_unregister_protosw(&tcpv6_protosw);
2241 out_tcpv6_protocol:
2242 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2243 	goto out;
2244 }
2245 
tcpv6_exit(void)2246 void tcpv6_exit(void)
2247 {
2248 	unregister_pernet_subsys(&tcpv6_net_ops);
2249 	inet6_unregister_protosw(&tcpv6_protosw);
2250 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2251 }
2252