• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		rcu_assign_pointer(sk->sk_rx_dst, dst);
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
241 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
242 		if (sk_is_mptcp(sk))
243 			mptcpv6_handle_mapped(sk, true);
244 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247 #endif
248 
249 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250 
251 		if (err) {
252 			icsk->icsk_ext_hdr_len = exthdrlen;
253 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
254 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
255 			if (sk_is_mptcp(sk))
256 				mptcpv6_handle_mapped(sk, false);
257 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
258 #ifdef CONFIG_TCP_MD5SIG
259 			tp->af_specific = &tcp_sock_ipv6_specific;
260 #endif
261 			goto failure;
262 		}
263 		np->saddr = sk->sk_v6_rcv_saddr;
264 
265 		return err;
266 	}
267 
268 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269 		saddr = &sk->sk_v6_rcv_saddr;
270 
271 	fl6.flowi6_proto = IPPROTO_TCP;
272 	fl6.daddr = sk->sk_v6_daddr;
273 	fl6.saddr = saddr ? *saddr : np->saddr;
274 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
275 	fl6.flowi6_oif = sk->sk_bound_dev_if;
276 	fl6.flowi6_mark = sk->sk_mark;
277 	fl6.fl6_dport = usin->sin6_port;
278 	fl6.fl6_sport = inet->inet_sport;
279 	fl6.flowi6_uid = sk->sk_uid;
280 
281 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282 	final_p = fl6_update_dst(&fl6, opt, &final);
283 
284 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285 
286 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
287 	if (IS_ERR(dst)) {
288 		err = PTR_ERR(dst);
289 		goto failure;
290 	}
291 
292 	if (!saddr) {
293 		saddr = &fl6.saddr;
294 		sk->sk_v6_rcv_saddr = *saddr;
295 	}
296 
297 	/* set the source address */
298 	np->saddr = *saddr;
299 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300 
301 	sk->sk_gso_type = SKB_GSO_TCPV6;
302 	ip6_dst_store(sk, dst, NULL, NULL);
303 
304 	icsk->icsk_ext_hdr_len = 0;
305 	if (opt)
306 		icsk->icsk_ext_hdr_len = opt->opt_flen +
307 					 opt->opt_nflen;
308 
309 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310 
311 	inet->inet_dport = usin->sin6_port;
312 
313 	tcp_set_state(sk, TCP_SYN_SENT);
314 	err = inet6_hash_connect(tcp_death_row, sk);
315 	if (err)
316 		goto late_failure;
317 
318 	sk_set_txhash(sk);
319 
320 	if (likely(!tp->repair)) {
321 		if (!tp->write_seq)
322 			WRITE_ONCE(tp->write_seq,
323 				   secure_tcpv6_seq(np->saddr.s6_addr32,
324 						    sk->sk_v6_daddr.s6_addr32,
325 						    inet->inet_sport,
326 						    inet->inet_dport));
327 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
328 						   np->saddr.s6_addr32,
329 						   sk->sk_v6_daddr.s6_addr32);
330 	}
331 
332 	if (tcp_fastopen_defer_connect(sk, &err))
333 		return err;
334 	if (err)
335 		goto late_failure;
336 
337 	err = tcp_connect(sk);
338 	if (err)
339 		goto late_failure;
340 
341 	return 0;
342 
343 late_failure:
344 	tcp_set_state(sk, TCP_CLOSE);
345 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
346 		inet_reset_saddr(sk);
347 failure:
348 	inet->inet_dport = 0;
349 	sk->sk_route_caps = 0;
350 	return err;
351 }
352 
tcp_v6_mtu_reduced(struct sock * sk)353 static void tcp_v6_mtu_reduced(struct sock *sk)
354 {
355 	struct dst_entry *dst;
356 	u32 mtu;
357 
358 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359 		return;
360 
361 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362 
363 	/* Drop requests trying to increase our current mss.
364 	 * Check done in __ip6_rt_update_pmtu() is too late.
365 	 */
366 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367 		return;
368 
369 	dst = inet6_csk_update_pmtu(sk, mtu);
370 	if (!dst)
371 		return;
372 
373 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374 		tcp_sync_mss(sk, dst_mtu(dst));
375 		tcp_simple_retransmit(sk);
376 	}
377 }
378 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380 		u8 type, u8 code, int offset, __be32 info)
381 {
382 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384 	struct net *net = dev_net(skb->dev);
385 	struct request_sock *fastopen;
386 	struct ipv6_pinfo *np;
387 	struct tcp_sock *tp;
388 	__u32 seq, snd_una;
389 	struct sock *sk;
390 	bool fatal;
391 	int err;
392 
393 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
394 					&hdr->daddr, th->dest,
395 					&hdr->saddr, ntohs(th->source),
396 					skb->dev->ifindex, inet6_sdif(skb));
397 
398 	if (!sk) {
399 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400 				  ICMP6_MIB_INERRORS);
401 		return -ENOENT;
402 	}
403 
404 	if (sk->sk_state == TCP_TIME_WAIT) {
405 		inet_twsk_put(inet_twsk(sk));
406 		return 0;
407 	}
408 	seq = ntohl(th->seq);
409 	fatal = icmpv6_err_convert(type, code, &err);
410 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
411 		tcp_req_err(sk, seq, fatal);
412 		return 0;
413 	}
414 
415 	bh_lock_sock(sk);
416 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418 
419 	if (sk->sk_state == TCP_CLOSE)
420 		goto out;
421 
422 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
423 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424 		goto out;
425 	}
426 
427 	tp = tcp_sk(sk);
428 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
429 	fastopen = rcu_dereference(tp->fastopen_rsk);
430 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
431 	if (sk->sk_state != TCP_LISTEN &&
432 	    !between(seq, snd_una, tp->snd_nxt)) {
433 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
434 		goto out;
435 	}
436 
437 	np = tcp_inet6_sk(sk);
438 
439 	if (type == NDISC_REDIRECT) {
440 		if (!sock_owned_by_user(sk)) {
441 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
442 
443 			if (dst)
444 				dst->ops->redirect(dst, sk, skb);
445 		}
446 		goto out;
447 	}
448 
449 	if (type == ICMPV6_PKT_TOOBIG) {
450 		u32 mtu = ntohl(info);
451 
452 		/* We are not interested in TCP_LISTEN and open_requests
453 		 * (SYN-ACKs send out by Linux are always <576bytes so
454 		 * they should go through unfragmented).
455 		 */
456 		if (sk->sk_state == TCP_LISTEN)
457 			goto out;
458 
459 		if (!ip6_sk_accept_pmtu(sk))
460 			goto out;
461 
462 		if (mtu < IPV6_MIN_MTU)
463 			goto out;
464 
465 		WRITE_ONCE(tp->mtu_info, mtu);
466 
467 		if (!sock_owned_by_user(sk))
468 			tcp_v6_mtu_reduced(sk);
469 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
470 					   &sk->sk_tsq_flags))
471 			sock_hold(sk);
472 		goto out;
473 	}
474 
475 
476 	/* Might be for an request_sock */
477 	switch (sk->sk_state) {
478 	case TCP_SYN_SENT:
479 	case TCP_SYN_RECV:
480 		/* Only in fast or simultaneous open. If a fast open socket is
481 		 * already accepted it is treated as a connected one below.
482 		 */
483 		if (fastopen && !fastopen->sk)
484 			break;
485 
486 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
487 
488 		if (!sock_owned_by_user(sk)) {
489 			sk->sk_err = err;
490 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
491 
492 			tcp_done(sk);
493 		} else
494 			sk->sk_err_soft = err;
495 		goto out;
496 	case TCP_LISTEN:
497 		break;
498 	default:
499 		/* check if this ICMP message allows revert of backoff.
500 		 * (see RFC 6069)
501 		 */
502 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
503 		    code == ICMPV6_NOROUTE)
504 			tcp_ld_RTO_revert(sk, seq);
505 	}
506 
507 	if (!sock_owned_by_user(sk) && np->recverr) {
508 		sk->sk_err = err;
509 		sk->sk_error_report(sk);
510 	} else
511 		sk->sk_err_soft = err;
512 
513 out:
514 	bh_unlock_sock(sk);
515 	sock_put(sk);
516 	return 0;
517 }
518 
519 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)520 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
521 			      struct flowi *fl,
522 			      struct request_sock *req,
523 			      struct tcp_fastopen_cookie *foc,
524 			      enum tcp_synack_type synack_type,
525 			      struct sk_buff *syn_skb)
526 {
527 	struct inet_request_sock *ireq = inet_rsk(req);
528 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
529 	struct ipv6_txoptions *opt;
530 	struct flowi6 *fl6 = &fl->u.ip6;
531 	struct sk_buff *skb;
532 	int err = -ENOMEM;
533 	u8 tclass;
534 
535 	/* First, grab a route. */
536 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
537 					       IPPROTO_TCP)) == NULL)
538 		goto done;
539 
540 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
541 
542 	if (skb) {
543 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
544 				    &ireq->ir_v6_rmt_addr);
545 
546 		fl6->daddr = ireq->ir_v6_rmt_addr;
547 		if (np->repflow && ireq->pktopts)
548 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
549 
550 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
551 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
552 				(np->tclass & INET_ECN_MASK) :
553 				np->tclass;
554 
555 		if (!INET_ECN_is_capable(tclass) &&
556 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
557 			tclass |= INET_ECN_ECT_0;
558 
559 		rcu_read_lock();
560 		opt = ireq->ipv6_opt;
561 		if (!opt)
562 			opt = rcu_dereference(np->opt);
563 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
564 			       tclass, sk->sk_priority);
565 		rcu_read_unlock();
566 		err = net_xmit_eval(err);
567 	}
568 
569 done:
570 	return err;
571 }
572 
573 
tcp_v6_reqsk_destructor(struct request_sock * req)574 static void tcp_v6_reqsk_destructor(struct request_sock *req)
575 {
576 	kfree(inet_rsk(req)->ipv6_opt);
577 	kfree_skb(inet_rsk(req)->pktopts);
578 }
579 
580 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)581 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
582 						   const struct in6_addr *addr,
583 						   int l3index)
584 {
585 	return tcp_md5_do_lookup(sk, l3index,
586 				 (union tcp_md5_addr *)addr, AF_INET6);
587 }
588 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)589 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
590 						const struct sock *addr_sk)
591 {
592 	int l3index;
593 
594 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
595 						 addr_sk->sk_bound_dev_if);
596 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
597 				    l3index);
598 }
599 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)600 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
601 				 sockptr_t optval, int optlen)
602 {
603 	struct tcp_md5sig cmd;
604 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
605 	int l3index = 0;
606 	u8 prefixlen;
607 
608 	if (optlen < sizeof(cmd))
609 		return -EINVAL;
610 
611 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612 		return -EFAULT;
613 
614 	if (sin6->sin6_family != AF_INET6)
615 		return -EINVAL;
616 
617 	if (optname == TCP_MD5SIG_EXT &&
618 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
619 		prefixlen = cmd.tcpm_prefixlen;
620 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
621 					prefixlen > 32))
622 			return -EINVAL;
623 	} else {
624 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
625 	}
626 
627 	if (optname == TCP_MD5SIG_EXT &&
628 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
629 		struct net_device *dev;
630 
631 		rcu_read_lock();
632 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
633 		if (dev && netif_is_l3_master(dev))
634 			l3index = dev->ifindex;
635 		rcu_read_unlock();
636 
637 		/* ok to reference set/not set outside of rcu;
638 		 * right now device MUST be an L3 master
639 		 */
640 		if (!dev || !l3index)
641 			return -EINVAL;
642 	}
643 
644 	if (!cmd.tcpm_keylen) {
645 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
646 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
647 					      AF_INET, prefixlen,
648 					      l3index);
649 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
650 				      AF_INET6, prefixlen, l3index);
651 	}
652 
653 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
654 		return -EINVAL;
655 
656 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
657 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
658 				      AF_INET, prefixlen, l3index,
659 				      cmd.tcpm_key, cmd.tcpm_keylen,
660 				      GFP_KERNEL);
661 
662 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663 			      AF_INET6, prefixlen, l3index,
664 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
665 }
666 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668 				   const struct in6_addr *daddr,
669 				   const struct in6_addr *saddr,
670 				   const struct tcphdr *th, int nbytes)
671 {
672 	struct tcp6_pseudohdr *bp;
673 	struct scatterlist sg;
674 	struct tcphdr *_th;
675 
676 	bp = hp->scratch;
677 	/* 1. TCP pseudo-header (RFC2460) */
678 	bp->saddr = *saddr;
679 	bp->daddr = *daddr;
680 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
681 	bp->len = cpu_to_be32(nbytes);
682 
683 	_th = (struct tcphdr *)(bp + 1);
684 	memcpy(_th, th, sizeof(*th));
685 	_th->check = 0;
686 
687 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689 				sizeof(*bp) + sizeof(*th));
690 	return crypto_ahash_update(hp->md5_req);
691 }
692 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694 			       const struct in6_addr *daddr, struct in6_addr *saddr,
695 			       const struct tcphdr *th)
696 {
697 	struct tcp_md5sig_pool *hp;
698 	struct ahash_request *req;
699 
700 	hp = tcp_get_md5sig_pool();
701 	if (!hp)
702 		goto clear_hash_noput;
703 	req = hp->md5_req;
704 
705 	if (crypto_ahash_init(req))
706 		goto clear_hash;
707 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708 		goto clear_hash;
709 	if (tcp_md5_hash_key(hp, key))
710 		goto clear_hash;
711 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
712 	if (crypto_ahash_final(req))
713 		goto clear_hash;
714 
715 	tcp_put_md5sig_pool();
716 	return 0;
717 
718 clear_hash:
719 	tcp_put_md5sig_pool();
720 clear_hash_noput:
721 	memset(md5_hash, 0, 16);
722 	return 1;
723 }
724 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726 			       const struct tcp_md5sig_key *key,
727 			       const struct sock *sk,
728 			       const struct sk_buff *skb)
729 {
730 	const struct in6_addr *saddr, *daddr;
731 	struct tcp_md5sig_pool *hp;
732 	struct ahash_request *req;
733 	const struct tcphdr *th = tcp_hdr(skb);
734 
735 	if (sk) { /* valid for establish/request sockets */
736 		saddr = &sk->sk_v6_rcv_saddr;
737 		daddr = &sk->sk_v6_daddr;
738 	} else {
739 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 		saddr = &ip6h->saddr;
741 		daddr = &ip6h->daddr;
742 	}
743 
744 	hp = tcp_get_md5sig_pool();
745 	if (!hp)
746 		goto clear_hash_noput;
747 	req = hp->md5_req;
748 
749 	if (crypto_ahash_init(req))
750 		goto clear_hash;
751 
752 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753 		goto clear_hash;
754 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755 		goto clear_hash;
756 	if (tcp_md5_hash_key(hp, key))
757 		goto clear_hash;
758 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
759 	if (crypto_ahash_final(req))
760 		goto clear_hash;
761 
762 	tcp_put_md5sig_pool();
763 	return 0;
764 
765 clear_hash:
766 	tcp_put_md5sig_pool();
767 clear_hash_noput:
768 	memset(md5_hash, 0, 16);
769 	return 1;
770 }
771 
772 #endif
773 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)774 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
775 				    const struct sk_buff *skb,
776 				    int dif, int sdif)
777 {
778 #ifdef CONFIG_TCP_MD5SIG
779 	const __u8 *hash_location = NULL;
780 	struct tcp_md5sig_key *hash_expected;
781 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
782 	const struct tcphdr *th = tcp_hdr(skb);
783 	int genhash, l3index;
784 	u8 newhash[16];
785 
786 	/* sdif set, means packet ingressed via a device
787 	 * in an L3 domain and dif is set to the l3mdev
788 	 */
789 	l3index = sdif ? dif : 0;
790 
791 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
792 	hash_location = tcp_parse_md5sig_option(th);
793 
794 	/* We've parsed the options - do we have a hash? */
795 	if (!hash_expected && !hash_location)
796 		return false;
797 
798 	if (hash_expected && !hash_location) {
799 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
800 		return true;
801 	}
802 
803 	if (!hash_expected && hash_location) {
804 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
805 		return true;
806 	}
807 
808 	/* check the signature */
809 	genhash = tcp_v6_md5_hash_skb(newhash,
810 				      hash_expected,
811 				      NULL, skb);
812 
813 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
814 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
815 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
816 				     genhash ? "failed" : "mismatch",
817 				     &ip6h->saddr, ntohs(th->source),
818 				     &ip6h->daddr, ntohs(th->dest), l3index);
819 		return true;
820 	}
821 #endif
822 	return false;
823 }
824 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)825 static void tcp_v6_init_req(struct request_sock *req,
826 			    const struct sock *sk_listener,
827 			    struct sk_buff *skb)
828 {
829 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
830 	struct inet_request_sock *ireq = inet_rsk(req);
831 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
832 
833 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
834 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
835 
836 	/* So that link locals have meaning */
837 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
838 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
839 		ireq->ir_iif = tcp_v6_iif(skb);
840 
841 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
842 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
843 	     np->rxopt.bits.rxinfo ||
844 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
845 	     np->rxopt.bits.rxohlim || np->repflow)) {
846 		refcount_inc(&skb->users);
847 		ireq->pktopts = skb;
848 	}
849 }
850 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)851 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
852 					  struct flowi *fl,
853 					  const struct request_sock *req)
854 {
855 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
856 }
857 
858 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
859 	.family		=	AF_INET6,
860 	.obj_size	=	sizeof(struct tcp6_request_sock),
861 	.rtx_syn_ack	=	tcp_rtx_synack,
862 	.send_ack	=	tcp_v6_reqsk_send_ack,
863 	.destructor	=	tcp_v6_reqsk_destructor,
864 	.send_reset	=	tcp_v6_send_reset,
865 	.syn_ack_timeout =	tcp_syn_ack_timeout,
866 };
867 
868 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
869 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
870 				sizeof(struct ipv6hdr),
871 #ifdef CONFIG_TCP_MD5SIG
872 	.req_md5_lookup	=	tcp_v6_md5_lookup,
873 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
874 #endif
875 	.init_req	=	tcp_v6_init_req,
876 #ifdef CONFIG_SYN_COOKIES
877 	.cookie_init_seq =	cookie_v6_init_sequence,
878 #endif
879 	.route_req	=	tcp_v6_route_req,
880 	.init_seq	=	tcp_v6_init_seq,
881 	.init_ts_off	=	tcp_v6_init_ts_off,
882 	.send_synack	=	tcp_v6_send_synack,
883 };
884 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)885 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
887 				 int oif, struct tcp_md5sig_key *key, int rst,
888 				 u8 tclass, __be32 label, u32 priority)
889 {
890 	const struct tcphdr *th = tcp_hdr(skb);
891 	struct tcphdr *t1;
892 	struct sk_buff *buff;
893 	struct flowi6 fl6;
894 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895 	struct sock *ctl_sk = net->ipv6.tcp_sk;
896 	unsigned int tot_len = sizeof(struct tcphdr);
897 	struct dst_entry *dst;
898 	__be32 *topt;
899 	__u32 mark = 0;
900 
901 	if (tsecr)
902 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
903 #ifdef CONFIG_TCP_MD5SIG
904 	if (key)
905 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
906 #endif
907 
908 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
909 			 GFP_ATOMIC);
910 	if (!buff)
911 		return;
912 
913 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
914 
915 	t1 = skb_push(buff, tot_len);
916 	skb_reset_transport_header(buff);
917 
918 	/* Swap the send and the receive. */
919 	memset(t1, 0, sizeof(*t1));
920 	t1->dest = th->source;
921 	t1->source = th->dest;
922 	t1->doff = tot_len / 4;
923 	t1->seq = htonl(seq);
924 	t1->ack_seq = htonl(ack);
925 	t1->ack = !rst || !th->ack;
926 	t1->rst = rst;
927 	t1->window = htons(win);
928 
929 	topt = (__be32 *)(t1 + 1);
930 
931 	if (tsecr) {
932 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
933 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
934 		*topt++ = htonl(tsval);
935 		*topt++ = htonl(tsecr);
936 	}
937 
938 #ifdef CONFIG_TCP_MD5SIG
939 	if (key) {
940 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
941 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
942 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
943 				    &ipv6_hdr(skb)->saddr,
944 				    &ipv6_hdr(skb)->daddr, t1);
945 	}
946 #endif
947 
948 	memset(&fl6, 0, sizeof(fl6));
949 	fl6.daddr = ipv6_hdr(skb)->saddr;
950 	fl6.saddr = ipv6_hdr(skb)->daddr;
951 	fl6.flowlabel = label;
952 
953 	buff->ip_summed = CHECKSUM_PARTIAL;
954 	buff->csum = 0;
955 
956 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
957 
958 	fl6.flowi6_proto = IPPROTO_TCP;
959 	if (rt6_need_strict(&fl6.daddr) && !oif)
960 		fl6.flowi6_oif = tcp_v6_iif(skb);
961 	else {
962 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
963 			oif = skb->skb_iif;
964 
965 		fl6.flowi6_oif = oif;
966 	}
967 
968 	if (sk) {
969 		if (sk->sk_state == TCP_TIME_WAIT) {
970 			mark = inet_twsk(sk)->tw_mark;
971 			/* autoflowlabel relies on buff->hash */
972 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
973 				     PKT_HASH_TYPE_L4);
974 		} else {
975 			mark = sk->sk_mark;
976 		}
977 		buff->tstamp = tcp_transmit_time(sk);
978 	}
979 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
980 	fl6.fl6_dport = t1->dest;
981 	fl6.fl6_sport = t1->source;
982 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
983 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
984 
985 	/* Pass a socket to ip6_dst_lookup either it is for RST
986 	 * Underlying function will use this to retrieve the network
987 	 * namespace
988 	 */
989 	if (sk && sk->sk_state != TCP_TIME_WAIT)
990 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
991 	else
992 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
993 	if (!IS_ERR(dst)) {
994 		skb_dst_set(buff, dst);
995 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
996 			 tclass & ~INET_ECN_MASK, priority);
997 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
998 		if (rst)
999 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1000 		return;
1001 	}
1002 
1003 	kfree_skb(buff);
1004 }
1005 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1006 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1007 {
1008 	const struct tcphdr *th = tcp_hdr(skb);
1009 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1010 	u32 seq = 0, ack_seq = 0;
1011 	struct tcp_md5sig_key *key = NULL;
1012 #ifdef CONFIG_TCP_MD5SIG
1013 	const __u8 *hash_location = NULL;
1014 	unsigned char newhash[16];
1015 	int genhash;
1016 	struct sock *sk1 = NULL;
1017 #endif
1018 	__be32 label = 0;
1019 	u32 priority = 0;
1020 	struct net *net;
1021 	int oif = 0;
1022 
1023 	if (th->rst)
1024 		return;
1025 
1026 	/* If sk not NULL, it means we did a successful lookup and incoming
1027 	 * route had to be correct. prequeue might have dropped our dst.
1028 	 */
1029 	if (!sk && !ipv6_unicast_destination(skb))
1030 		return;
1031 
1032 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1033 #ifdef CONFIG_TCP_MD5SIG
1034 	rcu_read_lock();
1035 	hash_location = tcp_parse_md5sig_option(th);
1036 	if (sk && sk_fullsock(sk)) {
1037 		int l3index;
1038 
1039 		/* sdif set, means packet ingressed via a device
1040 		 * in an L3 domain and inet_iif is set to it.
1041 		 */
1042 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1043 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1044 	} else if (hash_location) {
1045 		int dif = tcp_v6_iif_l3_slave(skb);
1046 		int sdif = tcp_v6_sdif(skb);
1047 		int l3index;
1048 
1049 		/*
1050 		 * active side is lost. Try to find listening socket through
1051 		 * source port, and then find md5 key through listening socket.
1052 		 * we are not loose security here:
1053 		 * Incoming packet is checked with md5 hash with finding key,
1054 		 * no RST generated if md5 hash doesn't match.
1055 		 */
1056 		sk1 = inet6_lookup_listener(net,
1057 					   &tcp_hashinfo, NULL, 0,
1058 					   &ipv6h->saddr,
1059 					   th->source, &ipv6h->daddr,
1060 					   ntohs(th->source), dif, sdif);
1061 		if (!sk1)
1062 			goto out;
1063 
1064 		/* sdif set, means packet ingressed via a device
1065 		 * in an L3 domain and dif is set to it.
1066 		 */
1067 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1068 
1069 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1070 		if (!key)
1071 			goto out;
1072 
1073 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1074 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1075 			goto out;
1076 	}
1077 #endif
1078 
1079 	if (th->ack)
1080 		seq = ntohl(th->ack_seq);
1081 	else
1082 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1083 			  (th->doff << 2);
1084 
1085 	if (sk) {
1086 		oif = sk->sk_bound_dev_if;
1087 		if (sk_fullsock(sk)) {
1088 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1089 
1090 			trace_tcp_send_reset(sk, skb);
1091 			if (np->repflow)
1092 				label = ip6_flowlabel(ipv6h);
1093 			priority = sk->sk_priority;
1094 		}
1095 		if (sk->sk_state == TCP_TIME_WAIT) {
1096 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1097 			priority = inet_twsk(sk)->tw_priority;
1098 		}
1099 	} else {
1100 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1101 			label = ip6_flowlabel(ipv6h);
1102 	}
1103 
1104 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1105 			     ipv6_get_dsfield(ipv6h), label, priority);
1106 
1107 #ifdef CONFIG_TCP_MD5SIG
1108 out:
1109 	rcu_read_unlock();
1110 #endif
1111 }
1112 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1113 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1114 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1115 			    struct tcp_md5sig_key *key, u8 tclass,
1116 			    __be32 label, u32 priority)
1117 {
1118 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1119 			     tclass, label, priority);
1120 }
1121 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1122 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1123 {
1124 	struct inet_timewait_sock *tw = inet_twsk(sk);
1125 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1126 
1127 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1128 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1129 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1130 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1131 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1132 
1133 	inet_twsk_put(tw);
1134 }
1135 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1136 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1137 				  struct request_sock *req)
1138 {
1139 	int l3index;
1140 
1141 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1142 
1143 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1144 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1145 	 */
1146 	/* RFC 7323 2.3
1147 	 * The window field (SEG.WND) of every outgoing segment, with the
1148 	 * exception of <SYN> segments, MUST be right-shifted by
1149 	 * Rcv.Wind.Shift bits:
1150 	 */
1151 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1152 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1153 			tcp_rsk(req)->rcv_nxt,
1154 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1155 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1156 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1157 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1158 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1159 }
1160 
1161 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1162 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1163 {
1164 #ifdef CONFIG_SYN_COOKIES
1165 	const struct tcphdr *th = tcp_hdr(skb);
1166 
1167 	if (!th->syn)
1168 		sk = cookie_v6_check(sk, skb);
1169 #endif
1170 	return sk;
1171 }
1172 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1173 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1174 			 struct tcphdr *th, u32 *cookie)
1175 {
1176 	u16 mss = 0;
1177 #ifdef CONFIG_SYN_COOKIES
1178 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1179 				    &tcp_request_sock_ipv6_ops, sk, th);
1180 	if (mss) {
1181 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1182 		tcp_synq_overflow(sk);
1183 	}
1184 #endif
1185 	return mss;
1186 }
1187 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1188 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1189 {
1190 	if (skb->protocol == htons(ETH_P_IP))
1191 		return tcp_v4_conn_request(sk, skb);
1192 
1193 	if (!ipv6_unicast_destination(skb))
1194 		goto drop;
1195 
1196 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1197 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1198 		return 0;
1199 	}
1200 
1201 	return tcp_conn_request(&tcp6_request_sock_ops,
1202 				&tcp_request_sock_ipv6_ops, sk, skb);
1203 
1204 drop:
1205 	tcp_listendrop(sk);
1206 	return 0; /* don't send reset */
1207 }
1208 
tcp_v6_restore_cb(struct sk_buff * skb)1209 static void tcp_v6_restore_cb(struct sk_buff *skb)
1210 {
1211 	/* We need to move header back to the beginning if xfrm6_policy_check()
1212 	 * and tcp_v6_fill_cb() are going to be called again.
1213 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1214 	 */
1215 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1216 		sizeof(struct inet6_skb_parm));
1217 }
1218 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1219 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1220 					 struct request_sock *req,
1221 					 struct dst_entry *dst,
1222 					 struct request_sock *req_unhash,
1223 					 bool *own_req)
1224 {
1225 	struct inet_request_sock *ireq;
1226 	struct ipv6_pinfo *newnp;
1227 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1228 	struct ipv6_txoptions *opt;
1229 	struct inet_sock *newinet;
1230 	bool found_dup_sk = false;
1231 	struct tcp_sock *newtp;
1232 	struct sock *newsk;
1233 #ifdef CONFIG_TCP_MD5SIG
1234 	struct tcp_md5sig_key *key;
1235 	int l3index;
1236 #endif
1237 	struct flowi6 fl6;
1238 
1239 	if (skb->protocol == htons(ETH_P_IP)) {
1240 		/*
1241 		 *	v6 mapped
1242 		 */
1243 
1244 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1245 					     req_unhash, own_req);
1246 
1247 		if (!newsk)
1248 			return NULL;
1249 
1250 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1251 
1252 		newinet = inet_sk(newsk);
1253 		newnp = tcp_inet6_sk(newsk);
1254 		newtp = tcp_sk(newsk);
1255 
1256 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1257 
1258 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1259 
1260 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1261 		if (sk_is_mptcp(newsk))
1262 			mptcpv6_handle_mapped(newsk, true);
1263 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1264 #ifdef CONFIG_TCP_MD5SIG
1265 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1266 #endif
1267 
1268 		newnp->ipv6_mc_list = NULL;
1269 		newnp->ipv6_ac_list = NULL;
1270 		newnp->ipv6_fl_list = NULL;
1271 		newnp->pktoptions  = NULL;
1272 		newnp->opt	   = NULL;
1273 		newnp->mcast_oif   = inet_iif(skb);
1274 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1275 		newnp->rcv_flowinfo = 0;
1276 		if (np->repflow)
1277 			newnp->flow_label = 0;
1278 
1279 		/*
1280 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1281 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1282 		 * that function for the gory details. -acme
1283 		 */
1284 
1285 		/* It is tricky place. Until this moment IPv4 tcp
1286 		   worked with IPv6 icsk.icsk_af_ops.
1287 		   Sync it now.
1288 		 */
1289 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1290 
1291 		return newsk;
1292 	}
1293 
1294 	ireq = inet_rsk(req);
1295 
1296 	if (sk_acceptq_is_full(sk))
1297 		goto out_overflow;
1298 
1299 	if (!dst) {
1300 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1301 		if (!dst)
1302 			goto out;
1303 	}
1304 
1305 	newsk = tcp_create_openreq_child(sk, req, skb);
1306 	if (!newsk)
1307 		goto out_nonewsk;
1308 
1309 	/*
1310 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1311 	 * count here, tcp_create_openreq_child now does this for us, see the
1312 	 * comment in that function for the gory details. -acme
1313 	 */
1314 
1315 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1316 	ip6_dst_store(newsk, dst, NULL, NULL);
1317 	inet6_sk_rx_dst_set(newsk, skb);
1318 
1319 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1320 
1321 	newtp = tcp_sk(newsk);
1322 	newinet = inet_sk(newsk);
1323 	newnp = tcp_inet6_sk(newsk);
1324 
1325 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1326 
1327 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1328 	newnp->saddr = ireq->ir_v6_loc_addr;
1329 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1330 	newsk->sk_bound_dev_if = ireq->ir_iif;
1331 
1332 	/* Now IPv6 options...
1333 
1334 	   First: no IPv4 options.
1335 	 */
1336 	newinet->inet_opt = NULL;
1337 	newnp->ipv6_mc_list = NULL;
1338 	newnp->ipv6_ac_list = NULL;
1339 	newnp->ipv6_fl_list = NULL;
1340 
1341 	/* Clone RX bits */
1342 	newnp->rxopt.all = np->rxopt.all;
1343 
1344 	newnp->pktoptions = NULL;
1345 	newnp->opt	  = NULL;
1346 	newnp->mcast_oif  = tcp_v6_iif(skb);
1347 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1348 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1349 	if (np->repflow)
1350 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1351 
1352 	/* Set ToS of the new socket based upon the value of incoming SYN.
1353 	 * ECT bits are set later in tcp_init_transfer().
1354 	 */
1355 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1356 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1357 
1358 	/* Clone native IPv6 options from listening socket (if any)
1359 
1360 	   Yes, keeping reference count would be much more clever,
1361 	   but we make one more one thing there: reattach optmem
1362 	   to newsk.
1363 	 */
1364 	opt = ireq->ipv6_opt;
1365 	if (!opt)
1366 		opt = rcu_dereference(np->opt);
1367 	if (opt) {
1368 		opt = ipv6_dup_options(newsk, opt);
1369 		RCU_INIT_POINTER(newnp->opt, opt);
1370 	}
1371 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1372 	if (opt)
1373 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1374 						    opt->opt_flen;
1375 
1376 	tcp_ca_openreq_child(newsk, dst);
1377 
1378 	tcp_sync_mss(newsk, dst_mtu(dst));
1379 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1380 
1381 	tcp_initialize_rcv_mss(newsk);
1382 
1383 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1384 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1385 
1386 #ifdef CONFIG_TCP_MD5SIG
1387 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1388 
1389 	/* Copy over the MD5 key from the original socket */
1390 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1391 	if (key) {
1392 		/* We're using one, so create a matching key
1393 		 * on the newsk structure. If we fail to get
1394 		 * memory, then we end up not copying the key
1395 		 * across. Shucks.
1396 		 */
1397 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1398 			       AF_INET6, 128, l3index, key->key, key->keylen,
1399 			       sk_gfp_mask(sk, GFP_ATOMIC));
1400 	}
1401 #endif
1402 
1403 	if (__inet_inherit_port(sk, newsk) < 0) {
1404 		inet_csk_prepare_forced_close(newsk);
1405 		tcp_done(newsk);
1406 		goto out;
1407 	}
1408 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1409 				       &found_dup_sk);
1410 	if (*own_req) {
1411 		tcp_move_syn(newtp, req);
1412 
1413 		/* Clone pktoptions received with SYN, if we own the req */
1414 		if (ireq->pktopts) {
1415 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1416 			consume_skb(ireq->pktopts);
1417 			ireq->pktopts = NULL;
1418 			if (newnp->pktoptions)
1419 				tcp_v6_restore_cb(newnp->pktoptions);
1420 		}
1421 	} else {
1422 		if (!req_unhash && found_dup_sk) {
1423 			/* This code path should only be executed in the
1424 			 * syncookie case only
1425 			 */
1426 			bh_unlock_sock(newsk);
1427 			sock_put(newsk);
1428 			newsk = NULL;
1429 		}
1430 	}
1431 
1432 	return newsk;
1433 
1434 out_overflow:
1435 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1436 out_nonewsk:
1437 	dst_release(dst);
1438 out:
1439 	tcp_listendrop(sk);
1440 	return NULL;
1441 }
1442 
1443 /* The socket must have it's spinlock held when we get
1444  * here, unless it is a TCP_LISTEN socket.
1445  *
1446  * We have a potential double-lock case here, so even when
1447  * doing backlog processing we use the BH locking scheme.
1448  * This is because we cannot sleep with the original spinlock
1449  * held.
1450  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1451 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1452 {
1453 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1454 	struct sk_buff *opt_skb = NULL;
1455 	struct tcp_sock *tp;
1456 
1457 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1458 	   goes to IPv4 receive handler and backlogged.
1459 	   From backlog it always goes here. Kerboom...
1460 	   Fortunately, tcp_rcv_established and rcv_established
1461 	   handle them correctly, but it is not case with
1462 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1463 	 */
1464 
1465 	if (skb->protocol == htons(ETH_P_IP))
1466 		return tcp_v4_do_rcv(sk, skb);
1467 
1468 	/*
1469 	 *	socket locking is here for SMP purposes as backlog rcv
1470 	 *	is currently called with bh processing disabled.
1471 	 */
1472 
1473 	/* Do Stevens' IPV6_PKTOPTIONS.
1474 
1475 	   Yes, guys, it is the only place in our code, where we
1476 	   may make it not affecting IPv4.
1477 	   The rest of code is protocol independent,
1478 	   and I do not like idea to uglify IPv4.
1479 
1480 	   Actually, all the idea behind IPV6_PKTOPTIONS
1481 	   looks not very well thought. For now we latch
1482 	   options, received in the last packet, enqueued
1483 	   by tcp. Feel free to propose better solution.
1484 					       --ANK (980728)
1485 	 */
1486 	if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1487 		opt_skb = skb_clone_and_charge_r(skb, sk);
1488 
1489 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1490 		struct dst_entry *dst;
1491 
1492 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1493 						lockdep_sock_is_held(sk));
1494 
1495 		sock_rps_save_rxhash(sk, skb);
1496 		sk_mark_napi_id(sk, skb);
1497 		if (dst) {
1498 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1499 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1500 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1501 				dst_release(dst);
1502 			}
1503 		}
1504 
1505 		tcp_rcv_established(sk, skb);
1506 		if (opt_skb)
1507 			goto ipv6_pktoptions;
1508 		return 0;
1509 	}
1510 
1511 	if (tcp_checksum_complete(skb))
1512 		goto csum_err;
1513 
1514 	if (sk->sk_state == TCP_LISTEN) {
1515 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1516 
1517 		if (!nsk)
1518 			goto discard;
1519 
1520 		if (nsk != sk) {
1521 			if (tcp_child_process(sk, nsk, skb))
1522 				goto reset;
1523 			return 0;
1524 		}
1525 	} else
1526 		sock_rps_save_rxhash(sk, skb);
1527 
1528 	if (tcp_rcv_state_process(sk, skb))
1529 		goto reset;
1530 	if (opt_skb)
1531 		goto ipv6_pktoptions;
1532 	return 0;
1533 
1534 reset:
1535 	tcp_v6_send_reset(sk, skb);
1536 discard:
1537 	if (opt_skb)
1538 		__kfree_skb(opt_skb);
1539 	kfree_skb(skb);
1540 	return 0;
1541 csum_err:
1542 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1543 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1544 	goto discard;
1545 
1546 
1547 ipv6_pktoptions:
1548 	/* Do you ask, what is it?
1549 
1550 	   1. skb was enqueued by tcp.
1551 	   2. skb is added to tail of read queue, rather than out of order.
1552 	   3. socket is not in passive state.
1553 	   4. Finally, it really contains options, which user wants to receive.
1554 	 */
1555 	tp = tcp_sk(sk);
1556 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1557 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1558 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1559 			np->mcast_oif = tcp_v6_iif(opt_skb);
1560 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1561 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1562 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1563 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1564 		if (np->repflow)
1565 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1566 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1567 			tcp_v6_restore_cb(opt_skb);
1568 			opt_skb = xchg(&np->pktoptions, opt_skb);
1569 		} else {
1570 			__kfree_skb(opt_skb);
1571 			opt_skb = xchg(&np->pktoptions, NULL);
1572 		}
1573 	}
1574 
1575 	kfree_skb(opt_skb);
1576 	return 0;
1577 }
1578 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1579 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1580 			   const struct tcphdr *th)
1581 {
1582 	/* This is tricky: we move IP6CB at its correct location into
1583 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1584 	 * _decode_session6() uses IP6CB().
1585 	 * barrier() makes sure compiler won't play aliasing games.
1586 	 */
1587 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1588 		sizeof(struct inet6_skb_parm));
1589 	barrier();
1590 
1591 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1592 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1593 				    skb->len - th->doff*4);
1594 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1595 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1596 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1597 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1598 	TCP_SKB_CB(skb)->sacked = 0;
1599 	TCP_SKB_CB(skb)->has_rxtstamp =
1600 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1601 }
1602 
tcp_v6_rcv(struct sk_buff * skb)1603 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1604 {
1605 	struct sk_buff *skb_to_free;
1606 	int sdif = inet6_sdif(skb);
1607 	int dif = inet6_iif(skb);
1608 	const struct tcphdr *th;
1609 	const struct ipv6hdr *hdr;
1610 	bool refcounted;
1611 	struct sock *sk;
1612 	int ret;
1613 	struct net *net = dev_net(skb->dev);
1614 
1615 	if (skb->pkt_type != PACKET_HOST)
1616 		goto discard_it;
1617 
1618 	/*
1619 	 *	Count it even if it's bad.
1620 	 */
1621 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1622 
1623 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1624 		goto discard_it;
1625 
1626 	th = (const struct tcphdr *)skb->data;
1627 
1628 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1629 		goto bad_packet;
1630 	if (!pskb_may_pull(skb, th->doff*4))
1631 		goto discard_it;
1632 
1633 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1634 		goto csum_error;
1635 
1636 	th = (const struct tcphdr *)skb->data;
1637 	hdr = ipv6_hdr(skb);
1638 
1639 lookup:
1640 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1641 				th->source, th->dest, inet6_iif(skb), sdif,
1642 				&refcounted);
1643 	if (!sk)
1644 		goto no_tcp_socket;
1645 
1646 process:
1647 	if (sk->sk_state == TCP_TIME_WAIT)
1648 		goto do_time_wait;
1649 
1650 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1651 		struct request_sock *req = inet_reqsk(sk);
1652 		bool req_stolen = false;
1653 		struct sock *nsk;
1654 
1655 		sk = req->rsk_listener;
1656 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1657 			sk_drops_add(sk, skb);
1658 			reqsk_put(req);
1659 			goto discard_it;
1660 		}
1661 		if (tcp_checksum_complete(skb)) {
1662 			reqsk_put(req);
1663 			goto csum_error;
1664 		}
1665 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1666 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1667 			goto lookup;
1668 		}
1669 		sock_hold(sk);
1670 		refcounted = true;
1671 		nsk = NULL;
1672 		if (!tcp_filter(sk, skb)) {
1673 			th = (const struct tcphdr *)skb->data;
1674 			hdr = ipv6_hdr(skb);
1675 			tcp_v6_fill_cb(skb, hdr, th);
1676 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1677 		}
1678 		if (!nsk) {
1679 			reqsk_put(req);
1680 			if (req_stolen) {
1681 				/* Another cpu got exclusive access to req
1682 				 * and created a full blown socket.
1683 				 * Try to feed this packet to this socket
1684 				 * instead of discarding it.
1685 				 */
1686 				tcp_v6_restore_cb(skb);
1687 				sock_put(sk);
1688 				goto lookup;
1689 			}
1690 			goto discard_and_relse;
1691 		}
1692 		if (nsk == sk) {
1693 			reqsk_put(req);
1694 			tcp_v6_restore_cb(skb);
1695 		} else if (tcp_child_process(sk, nsk, skb)) {
1696 			tcp_v6_send_reset(nsk, skb);
1697 			goto discard_and_relse;
1698 		} else {
1699 			sock_put(sk);
1700 			return 0;
1701 		}
1702 	}
1703 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1704 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1705 		goto discard_and_relse;
1706 	}
1707 
1708 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1709 		goto discard_and_relse;
1710 
1711 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1712 		goto discard_and_relse;
1713 
1714 	if (tcp_filter(sk, skb))
1715 		goto discard_and_relse;
1716 	th = (const struct tcphdr *)skb->data;
1717 	hdr = ipv6_hdr(skb);
1718 	tcp_v6_fill_cb(skb, hdr, th);
1719 
1720 	skb->dev = NULL;
1721 
1722 	if (sk->sk_state == TCP_LISTEN) {
1723 		ret = tcp_v6_do_rcv(sk, skb);
1724 		goto put_and_return;
1725 	}
1726 
1727 	sk_incoming_cpu_update(sk);
1728 
1729 	bh_lock_sock_nested(sk);
1730 	tcp_segs_in(tcp_sk(sk), skb);
1731 	ret = 0;
1732 	if (!sock_owned_by_user(sk)) {
1733 		skb_to_free = sk->sk_rx_skb_cache;
1734 		sk->sk_rx_skb_cache = NULL;
1735 		ret = tcp_v6_do_rcv(sk, skb);
1736 	} else {
1737 		if (tcp_add_backlog(sk, skb))
1738 			goto discard_and_relse;
1739 		skb_to_free = NULL;
1740 	}
1741 	bh_unlock_sock(sk);
1742 	if (skb_to_free)
1743 		__kfree_skb(skb_to_free);
1744 put_and_return:
1745 	if (refcounted)
1746 		sock_put(sk);
1747 	return ret ? -1 : 0;
1748 
1749 no_tcp_socket:
1750 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1751 		goto discard_it;
1752 
1753 	tcp_v6_fill_cb(skb, hdr, th);
1754 
1755 	if (tcp_checksum_complete(skb)) {
1756 csum_error:
1757 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1758 bad_packet:
1759 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1760 	} else {
1761 		tcp_v6_send_reset(NULL, skb);
1762 	}
1763 
1764 discard_it:
1765 	kfree_skb(skb);
1766 	return 0;
1767 
1768 discard_and_relse:
1769 	sk_drops_add(sk, skb);
1770 	if (refcounted)
1771 		sock_put(sk);
1772 	goto discard_it;
1773 
1774 do_time_wait:
1775 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1776 		inet_twsk_put(inet_twsk(sk));
1777 		goto discard_it;
1778 	}
1779 
1780 	tcp_v6_fill_cb(skb, hdr, th);
1781 
1782 	if (tcp_checksum_complete(skb)) {
1783 		inet_twsk_put(inet_twsk(sk));
1784 		goto csum_error;
1785 	}
1786 
1787 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1788 	case TCP_TW_SYN:
1789 	{
1790 		struct sock *sk2;
1791 
1792 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1793 					    skb, __tcp_hdrlen(th),
1794 					    &ipv6_hdr(skb)->saddr, th->source,
1795 					    &ipv6_hdr(skb)->daddr,
1796 					    ntohs(th->dest),
1797 					    tcp_v6_iif_l3_slave(skb),
1798 					    sdif);
1799 		if (sk2) {
1800 			struct inet_timewait_sock *tw = inet_twsk(sk);
1801 			inet_twsk_deschedule_put(tw);
1802 			sk = sk2;
1803 			tcp_v6_restore_cb(skb);
1804 			refcounted = false;
1805 			goto process;
1806 		}
1807 	}
1808 		/* to ACK */
1809 		fallthrough;
1810 	case TCP_TW_ACK:
1811 		tcp_v6_timewait_ack(sk, skb);
1812 		break;
1813 	case TCP_TW_RST:
1814 		tcp_v6_send_reset(sk, skb);
1815 		inet_twsk_deschedule_put(inet_twsk(sk));
1816 		goto discard_it;
1817 	case TCP_TW_SUCCESS:
1818 		;
1819 	}
1820 	goto discard_it;
1821 }
1822 
tcp_v6_early_demux(struct sk_buff * skb)1823 void tcp_v6_early_demux(struct sk_buff *skb)
1824 {
1825 	const struct ipv6hdr *hdr;
1826 	const struct tcphdr *th;
1827 	struct sock *sk;
1828 
1829 	if (skb->pkt_type != PACKET_HOST)
1830 		return;
1831 
1832 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1833 		return;
1834 
1835 	hdr = ipv6_hdr(skb);
1836 	th = tcp_hdr(skb);
1837 
1838 	if (th->doff < sizeof(struct tcphdr) / 4)
1839 		return;
1840 
1841 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1842 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1843 					&hdr->saddr, th->source,
1844 					&hdr->daddr, ntohs(th->dest),
1845 					inet6_iif(skb), inet6_sdif(skb));
1846 	if (sk) {
1847 		skb->sk = sk;
1848 		skb->destructor = sock_edemux;
1849 		if (sk_fullsock(sk)) {
1850 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1851 
1852 			if (dst)
1853 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1854 			if (dst &&
1855 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1856 				skb_dst_set_noref(skb, dst);
1857 		}
1858 	}
1859 }
1860 
1861 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1862 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1863 	.twsk_unique	= tcp_twsk_unique,
1864 	.twsk_destructor = tcp_twsk_destructor,
1865 };
1866 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1867 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1868 {
1869 	struct ipv6_pinfo *np = inet6_sk(sk);
1870 
1871 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1872 }
1873 
1874 const struct inet_connection_sock_af_ops ipv6_specific = {
1875 	.queue_xmit	   = inet6_csk_xmit,
1876 	.send_check	   = tcp_v6_send_check,
1877 	.rebuild_header	   = inet6_sk_rebuild_header,
1878 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1879 	.conn_request	   = tcp_v6_conn_request,
1880 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1881 	.net_header_len	   = sizeof(struct ipv6hdr),
1882 	.net_frag_header_len = sizeof(struct frag_hdr),
1883 	.setsockopt	   = ipv6_setsockopt,
1884 	.getsockopt	   = ipv6_getsockopt,
1885 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1886 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1887 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1888 };
1889 
1890 #ifdef CONFIG_TCP_MD5SIG
1891 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1892 	.md5_lookup	=	tcp_v6_md5_lookup,
1893 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1894 	.md5_parse	=	tcp_v6_parse_md5_keys,
1895 };
1896 #endif
1897 
1898 /*
1899  *	TCP over IPv4 via INET6 API
1900  */
1901 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1902 	.queue_xmit	   = ip_queue_xmit,
1903 	.send_check	   = tcp_v4_send_check,
1904 	.rebuild_header	   = inet_sk_rebuild_header,
1905 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1906 	.conn_request	   = tcp_v6_conn_request,
1907 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1908 	.net_header_len	   = sizeof(struct iphdr),
1909 	.setsockopt	   = ipv6_setsockopt,
1910 	.getsockopt	   = ipv6_getsockopt,
1911 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1912 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1913 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1914 };
1915 
1916 #ifdef CONFIG_TCP_MD5SIG
1917 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1918 	.md5_lookup	=	tcp_v4_md5_lookup,
1919 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1920 	.md5_parse	=	tcp_v6_parse_md5_keys,
1921 };
1922 #endif
1923 
1924 /* NOTE: A lot of things set to zero explicitly by call to
1925  *       sk_alloc() so need not be done here.
1926  */
tcp_v6_init_sock(struct sock * sk)1927 static int tcp_v6_init_sock(struct sock *sk)
1928 {
1929 	struct inet_connection_sock *icsk = inet_csk(sk);
1930 
1931 	tcp_init_sock(sk);
1932 
1933 	icsk->icsk_af_ops = &ipv6_specific;
1934 
1935 #ifdef CONFIG_TCP_MD5SIG
1936 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1937 #endif
1938 
1939 	return 0;
1940 }
1941 
1942 #ifdef CONFIG_PROC_FS
1943 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1944 static void get_openreq6(struct seq_file *seq,
1945 			 const struct request_sock *req, int i)
1946 {
1947 	long ttd = req->rsk_timer.expires - jiffies;
1948 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1949 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1950 
1951 	if (ttd < 0)
1952 		ttd = 0;
1953 
1954 	seq_printf(seq,
1955 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1956 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1957 		   i,
1958 		   src->s6_addr32[0], src->s6_addr32[1],
1959 		   src->s6_addr32[2], src->s6_addr32[3],
1960 		   inet_rsk(req)->ir_num,
1961 		   dest->s6_addr32[0], dest->s6_addr32[1],
1962 		   dest->s6_addr32[2], dest->s6_addr32[3],
1963 		   ntohs(inet_rsk(req)->ir_rmt_port),
1964 		   TCP_SYN_RECV,
1965 		   0, 0, /* could print option size, but that is af dependent. */
1966 		   1,   /* timers active (only the expire timer) */
1967 		   jiffies_to_clock_t(ttd),
1968 		   req->num_timeout,
1969 		   from_kuid_munged(seq_user_ns(seq),
1970 				    sock_i_uid(req->rsk_listener)),
1971 		   0,  /* non standard timer */
1972 		   0, /* open_requests have no inode */
1973 		   0, req);
1974 }
1975 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1976 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1977 {
1978 	const struct in6_addr *dest, *src;
1979 	__u16 destp, srcp;
1980 	int timer_active;
1981 	unsigned long timer_expires;
1982 	const struct inet_sock *inet = inet_sk(sp);
1983 	const struct tcp_sock *tp = tcp_sk(sp);
1984 	const struct inet_connection_sock *icsk = inet_csk(sp);
1985 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1986 	int rx_queue;
1987 	int state;
1988 
1989 	dest  = &sp->sk_v6_daddr;
1990 	src   = &sp->sk_v6_rcv_saddr;
1991 	destp = ntohs(inet->inet_dport);
1992 	srcp  = ntohs(inet->inet_sport);
1993 
1994 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1995 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1996 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1997 		timer_active	= 1;
1998 		timer_expires	= icsk->icsk_timeout;
1999 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2000 		timer_active	= 4;
2001 		timer_expires	= icsk->icsk_timeout;
2002 	} else if (timer_pending(&sp->sk_timer)) {
2003 		timer_active	= 2;
2004 		timer_expires	= sp->sk_timer.expires;
2005 	} else {
2006 		timer_active	= 0;
2007 		timer_expires = jiffies;
2008 	}
2009 
2010 	state = inet_sk_state_load(sp);
2011 	if (state == TCP_LISTEN)
2012 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2013 	else
2014 		/* Because we don't lock the socket,
2015 		 * we might find a transient negative value.
2016 		 */
2017 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2018 				      READ_ONCE(tp->copied_seq), 0);
2019 
2020 	seq_printf(seq,
2021 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2022 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2023 		   i,
2024 		   src->s6_addr32[0], src->s6_addr32[1],
2025 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2026 		   dest->s6_addr32[0], dest->s6_addr32[1],
2027 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2028 		   state,
2029 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2030 		   rx_queue,
2031 		   timer_active,
2032 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2033 		   icsk->icsk_retransmits,
2034 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2035 		   icsk->icsk_probes_out,
2036 		   sock_i_ino(sp),
2037 		   refcount_read(&sp->sk_refcnt), sp,
2038 		   jiffies_to_clock_t(icsk->icsk_rto),
2039 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2040 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2041 		   tp->snd_cwnd,
2042 		   state == TCP_LISTEN ?
2043 			fastopenq->max_qlen :
2044 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2045 		   );
2046 }
2047 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2048 static void get_timewait6_sock(struct seq_file *seq,
2049 			       struct inet_timewait_sock *tw, int i)
2050 {
2051 	long delta = tw->tw_timer.expires - jiffies;
2052 	const struct in6_addr *dest, *src;
2053 	__u16 destp, srcp;
2054 
2055 	dest = &tw->tw_v6_daddr;
2056 	src  = &tw->tw_v6_rcv_saddr;
2057 	destp = ntohs(tw->tw_dport);
2058 	srcp  = ntohs(tw->tw_sport);
2059 
2060 	seq_printf(seq,
2061 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2063 		   i,
2064 		   src->s6_addr32[0], src->s6_addr32[1],
2065 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2066 		   dest->s6_addr32[0], dest->s6_addr32[1],
2067 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2068 		   tw->tw_substate, 0, 0,
2069 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2070 		   refcount_read(&tw->tw_refcnt), tw);
2071 }
2072 
tcp6_seq_show(struct seq_file * seq,void * v)2073 static int tcp6_seq_show(struct seq_file *seq, void *v)
2074 {
2075 	struct tcp_iter_state *st;
2076 	struct sock *sk = v;
2077 
2078 	if (v == SEQ_START_TOKEN) {
2079 		seq_puts(seq,
2080 			 "  sl  "
2081 			 "local_address                         "
2082 			 "remote_address                        "
2083 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2084 			 "   uid  timeout inode\n");
2085 		goto out;
2086 	}
2087 	st = seq->private;
2088 
2089 	if (sk->sk_state == TCP_TIME_WAIT)
2090 		get_timewait6_sock(seq, v, st->num);
2091 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2092 		get_openreq6(seq, v, st->num);
2093 	else
2094 		get_tcp6_sock(seq, v, st->num);
2095 out:
2096 	return 0;
2097 }
2098 
2099 static const struct seq_operations tcp6_seq_ops = {
2100 	.show		= tcp6_seq_show,
2101 	.start		= tcp_seq_start,
2102 	.next		= tcp_seq_next,
2103 	.stop		= tcp_seq_stop,
2104 };
2105 
2106 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2107 	.family		= AF_INET6,
2108 };
2109 
tcp6_proc_init(struct net * net)2110 int __net_init tcp6_proc_init(struct net *net)
2111 {
2112 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2113 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2114 		return -ENOMEM;
2115 	return 0;
2116 }
2117 
tcp6_proc_exit(struct net * net)2118 void tcp6_proc_exit(struct net *net)
2119 {
2120 	remove_proc_entry("tcp6", net->proc_net);
2121 }
2122 #endif
2123 
2124 struct proto tcpv6_prot = {
2125 	.name			= "TCPv6",
2126 	.owner			= THIS_MODULE,
2127 	.close			= tcp_close,
2128 	.pre_connect		= tcp_v6_pre_connect,
2129 	.connect		= tcp_v6_connect,
2130 	.disconnect		= tcp_disconnect,
2131 	.accept			= inet_csk_accept,
2132 	.ioctl			= tcp_ioctl,
2133 	.init			= tcp_v6_init_sock,
2134 	.destroy		= tcp_v4_destroy_sock,
2135 	.shutdown		= tcp_shutdown,
2136 	.setsockopt		= tcp_setsockopt,
2137 	.getsockopt		= tcp_getsockopt,
2138 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2139 	.keepalive		= tcp_set_keepalive,
2140 	.recvmsg		= tcp_recvmsg,
2141 	.sendmsg		= tcp_sendmsg,
2142 	.sendpage		= tcp_sendpage,
2143 	.backlog_rcv		= tcp_v6_do_rcv,
2144 	.release_cb		= tcp_release_cb,
2145 	.hash			= inet6_hash,
2146 	.unhash			= inet_unhash,
2147 	.get_port		= inet_csk_get_port,
2148 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2149 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2150 	.stream_memory_free	= tcp_stream_memory_free,
2151 	.sockets_allocated	= &tcp_sockets_allocated,
2152 	.memory_allocated	= &tcp_memory_allocated,
2153 	.memory_pressure	= &tcp_memory_pressure,
2154 	.orphan_count		= &tcp_orphan_count,
2155 	.sysctl_mem		= sysctl_tcp_mem,
2156 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2157 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2158 	.max_header		= MAX_TCP_HEADER,
2159 	.obj_size		= sizeof(struct tcp6_sock),
2160 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2161 	.twsk_prot		= &tcp6_timewait_sock_ops,
2162 	.rsk_prot		= &tcp6_request_sock_ops,
2163 	.h.hashinfo		= &tcp_hashinfo,
2164 	.no_autobind		= true,
2165 	.diag_destroy		= tcp_abort,
2166 };
2167 EXPORT_SYMBOL_GPL(tcpv6_prot);
2168 
2169 static const struct inet6_protocol tcpv6_protocol = {
2170 	.handler	=	tcp_v6_rcv,
2171 	.err_handler	=	tcp_v6_err,
2172 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2173 };
2174 
2175 static struct inet_protosw tcpv6_protosw = {
2176 	.type		=	SOCK_STREAM,
2177 	.protocol	=	IPPROTO_TCP,
2178 	.prot		=	&tcpv6_prot,
2179 	.ops		=	&inet6_stream_ops,
2180 	.flags		=	INET_PROTOSW_PERMANENT |
2181 				INET_PROTOSW_ICSK,
2182 };
2183 
tcpv6_net_init(struct net * net)2184 static int __net_init tcpv6_net_init(struct net *net)
2185 {
2186 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2187 				    SOCK_RAW, IPPROTO_TCP, net);
2188 }
2189 
tcpv6_net_exit(struct net * net)2190 static void __net_exit tcpv6_net_exit(struct net *net)
2191 {
2192 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2193 }
2194 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2195 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2196 {
2197 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2198 }
2199 
2200 static struct pernet_operations tcpv6_net_ops = {
2201 	.init	    = tcpv6_net_init,
2202 	.exit	    = tcpv6_net_exit,
2203 	.exit_batch = tcpv6_net_exit_batch,
2204 };
2205 
tcpv6_init(void)2206 int __init tcpv6_init(void)
2207 {
2208 	int ret;
2209 
2210 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2211 	if (ret)
2212 		goto out;
2213 
2214 	/* register inet6 protocol */
2215 	ret = inet6_register_protosw(&tcpv6_protosw);
2216 	if (ret)
2217 		goto out_tcpv6_protocol;
2218 
2219 	ret = register_pernet_subsys(&tcpv6_net_ops);
2220 	if (ret)
2221 		goto out_tcpv6_protosw;
2222 
2223 	ret = mptcpv6_init();
2224 	if (ret)
2225 		goto out_tcpv6_pernet_subsys;
2226 
2227 out:
2228 	return ret;
2229 
2230 out_tcpv6_pernet_subsys:
2231 	unregister_pernet_subsys(&tcpv6_net_ops);
2232 out_tcpv6_protosw:
2233 	inet6_unregister_protosw(&tcpv6_protosw);
2234 out_tcpv6_protocol:
2235 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2236 	goto out;
2237 }
2238 
tcpv6_exit(void)2239 void tcpv6_exit(void)
2240 {
2241 	unregister_pernet_subsys(&tcpv6_net_ops);
2242 	inet6_unregister_protosw(&tcpv6_protosw);
2243 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2244 }
2245