• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr)
85 {
86 	return NULL;
87 }
88 #endif
89 
90 /* Helper returning the inet6 address from a given tcp socket.
91  * It can be used in TCP stack instead of inet6_sk(sk).
92  * This avoids a dereference and allow compiler optimizations.
93  * It is a specialized version of inet6_sk_generic().
94  */
tcp_inet6_sk(const struct sock * sk)95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96 {
97 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98 
99 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100 }
101 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103 {
104 	struct dst_entry *dst = skb_dst(skb);
105 
106 	if (dst && dst_hold_safe(dst)) {
107 		const struct rt6_info *rt = (const struct rt6_info *)dst;
108 
109 		rcu_assign_pointer(sk->sk_rx_dst, dst);
110 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112 	}
113 }
114 
tcp_v6_init_seq(const struct sk_buff * skb)115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116 {
117 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 				ipv6_hdr(skb)->saddr.s6_addr32,
119 				tcp_hdr(skb)->dest,
120 				tcp_hdr(skb)->source);
121 }
122 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124 {
125 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 				   ipv6_hdr(skb)->saddr.s6_addr32);
127 }
128 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 			      int addr_len)
131 {
132 	/* This check is replicated from tcp_v6_connect() and intended to
133 	 * prevent BPF program called below from accessing bytes that are out
134 	 * of the bound specified by user in addr_len.
135 	 */
136 	if (addr_len < SIN6_LEN_RFC2133)
137 		return -EINVAL;
138 
139 	sock_owned_by_me(sk);
140 
141 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142 }
143 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 			  int addr_len)
146 {
147 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 	struct inet_sock *inet = inet_sk(sk);
149 	struct inet_connection_sock *icsk = inet_csk(sk);
150 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 	struct tcp_sock *tp = tcp_sk(sk);
152 	struct in6_addr *saddr = NULL, *final_p, final;
153 	struct ipv6_txoptions *opt;
154 	struct flowi6 fl6;
155 	struct dst_entry *dst;
156 	int addr_type;
157 	int err;
158 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159 
160 	if (addr_len < SIN6_LEN_RFC2133)
161 		return -EINVAL;
162 
163 	if (usin->sin6_family != AF_INET6)
164 		return -EAFNOSUPPORT;
165 
166 	memset(&fl6, 0, sizeof(fl6));
167 
168 	if (np->sndflow) {
169 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 		IP6_ECN_flow_init(fl6.flowlabel);
171 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 			struct ip6_flowlabel *flowlabel;
173 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 			if (IS_ERR(flowlabel))
175 				return -EINVAL;
176 			fl6_sock_release(flowlabel);
177 		}
178 	}
179 
180 	/*
181 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
182 	 */
183 
184 	if (ipv6_addr_any(&usin->sin6_addr)) {
185 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 					       &usin->sin6_addr);
188 		else
189 			usin->sin6_addr = in6addr_loopback;
190 	}
191 
192 	addr_type = ipv6_addr_type(&usin->sin6_addr);
193 
194 	if (addr_type & IPV6_ADDR_MULTICAST)
195 		return -ENETUNREACH;
196 
197 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 		if (addr_len >= sizeof(struct sockaddr_in6) &&
199 		    usin->sin6_scope_id) {
200 			/* If interface is set while binding, indices
201 			 * must coincide.
202 			 */
203 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 				return -EINVAL;
205 
206 			sk->sk_bound_dev_if = usin->sin6_scope_id;
207 		}
208 
209 		/* Connect to link-local address requires an interface */
210 		if (!sk->sk_bound_dev_if)
211 			return -EINVAL;
212 	}
213 
214 	if (tp->rx_opt.ts_recent_stamp &&
215 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 		tp->rx_opt.ts_recent = 0;
217 		tp->rx_opt.ts_recent_stamp = 0;
218 		WRITE_ONCE(tp->write_seq, 0);
219 	}
220 
221 	sk->sk_v6_daddr = usin->sin6_addr;
222 	np->flow_label = fl6.flowlabel;
223 
224 	/*
225 	 *	TCP over IPv4
226 	 */
227 
228 	if (addr_type & IPV6_ADDR_MAPPED) {
229 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 		struct sockaddr_in sin;
231 
232 		if (__ipv6_only_sock(sk))
233 			return -ENETUNREACH;
234 
235 		sin.sin_family = AF_INET;
236 		sin.sin_port = usin->sin6_port;
237 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238 
239 		icsk->icsk_af_ops = &ipv6_mapped;
240 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
244 
245 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246 
247 		if (err) {
248 			icsk->icsk_ext_hdr_len = exthdrlen;
249 			icsk->icsk_af_ops = &ipv6_specific;
250 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 			tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254 			goto failure;
255 		}
256 		np->saddr = sk->sk_v6_rcv_saddr;
257 
258 		return err;
259 	}
260 
261 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 		saddr = &sk->sk_v6_rcv_saddr;
263 
264 	fl6.flowi6_proto = IPPROTO_TCP;
265 	fl6.daddr = sk->sk_v6_daddr;
266 	fl6.saddr = saddr ? *saddr : np->saddr;
267 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
268 	fl6.flowi6_oif = sk->sk_bound_dev_if;
269 	fl6.flowi6_mark = sk->sk_mark;
270 	fl6.fl6_dport = usin->sin6_port;
271 	fl6.fl6_sport = inet->inet_sport;
272 	fl6.flowi6_uid = sk->sk_uid;
273 
274 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
275 	final_p = fl6_update_dst(&fl6, opt, &final);
276 
277 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
278 
279 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
280 	if (IS_ERR(dst)) {
281 		err = PTR_ERR(dst);
282 		goto failure;
283 	}
284 
285 	if (!saddr) {
286 		saddr = &fl6.saddr;
287 		sk->sk_v6_rcv_saddr = *saddr;
288 	}
289 
290 	/* set the source address */
291 	np->saddr = *saddr;
292 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
293 
294 	sk->sk_gso_type = SKB_GSO_TCPV6;
295 	ip6_dst_store(sk, dst, NULL, NULL);
296 
297 	icsk->icsk_ext_hdr_len = 0;
298 	if (opt)
299 		icsk->icsk_ext_hdr_len = opt->opt_flen +
300 					 opt->opt_nflen;
301 
302 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
303 
304 	inet->inet_dport = usin->sin6_port;
305 
306 	tcp_set_state(sk, TCP_SYN_SENT);
307 	err = inet6_hash_connect(tcp_death_row, sk);
308 	if (err)
309 		goto late_failure;
310 
311 	sk_set_txhash(sk);
312 
313 	if (likely(!tp->repair)) {
314 		if (!tp->write_seq)
315 			WRITE_ONCE(tp->write_seq,
316 				   secure_tcpv6_seq(np->saddr.s6_addr32,
317 						    sk->sk_v6_daddr.s6_addr32,
318 						    inet->inet_sport,
319 						    inet->inet_dport));
320 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
321 						   np->saddr.s6_addr32,
322 						   sk->sk_v6_daddr.s6_addr32);
323 	}
324 
325 	if (tcp_fastopen_defer_connect(sk, &err))
326 		return err;
327 	if (err)
328 		goto late_failure;
329 
330 	err = tcp_connect(sk);
331 	if (err)
332 		goto late_failure;
333 
334 	return 0;
335 
336 late_failure:
337 	tcp_set_state(sk, TCP_CLOSE);
338 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
339 		inet_reset_saddr(sk);
340 failure:
341 	inet->inet_dport = 0;
342 	sk->sk_route_caps = 0;
343 	return err;
344 }
345 
tcp_v6_mtu_reduced(struct sock * sk)346 static void tcp_v6_mtu_reduced(struct sock *sk)
347 {
348 	struct dst_entry *dst;
349 	u32 mtu;
350 
351 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
352 		return;
353 
354 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
355 
356 	/* Drop requests trying to increase our current mss.
357 	 * Check done in __ip6_rt_update_pmtu() is too late.
358 	 */
359 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
360 		return;
361 
362 	dst = inet6_csk_update_pmtu(sk, mtu);
363 	if (!dst)
364 		return;
365 
366 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
367 		tcp_sync_mss(sk, dst_mtu(dst));
368 		tcp_simple_retransmit(sk);
369 	}
370 }
371 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)372 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
373 		u8 type, u8 code, int offset, __be32 info)
374 {
375 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
376 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
377 	struct net *net = dev_net(skb->dev);
378 	struct request_sock *fastopen;
379 	struct ipv6_pinfo *np;
380 	struct tcp_sock *tp;
381 	__u32 seq, snd_una;
382 	struct sock *sk;
383 	bool fatal;
384 	int err;
385 
386 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
387 					&hdr->daddr, th->dest,
388 					&hdr->saddr, ntohs(th->source),
389 					skb->dev->ifindex, inet6_sdif(skb));
390 
391 	if (!sk) {
392 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
393 				  ICMP6_MIB_INERRORS);
394 		return -ENOENT;
395 	}
396 
397 	if (sk->sk_state == TCP_TIME_WAIT) {
398 		inet_twsk_put(inet_twsk(sk));
399 		return 0;
400 	}
401 	seq = ntohl(th->seq);
402 	fatal = icmpv6_err_convert(type, code, &err);
403 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
404 		tcp_req_err(sk, seq, fatal);
405 		return 0;
406 	}
407 
408 	bh_lock_sock(sk);
409 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
410 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
411 
412 	if (sk->sk_state == TCP_CLOSE)
413 		goto out;
414 
415 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
416 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
417 		goto out;
418 	}
419 
420 	tp = tcp_sk(sk);
421 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
422 	fastopen = rcu_dereference(tp->fastopen_rsk);
423 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
424 	if (sk->sk_state != TCP_LISTEN &&
425 	    !between(seq, snd_una, tp->snd_nxt)) {
426 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
427 		goto out;
428 	}
429 
430 	np = tcp_inet6_sk(sk);
431 
432 	if (type == NDISC_REDIRECT) {
433 		if (!sock_owned_by_user(sk)) {
434 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
435 
436 			if (dst)
437 				dst->ops->redirect(dst, sk, skb);
438 		}
439 		goto out;
440 	}
441 
442 	if (type == ICMPV6_PKT_TOOBIG) {
443 		u32 mtu = ntohl(info);
444 
445 		/* We are not interested in TCP_LISTEN and open_requests
446 		 * (SYN-ACKs send out by Linux are always <576bytes so
447 		 * they should go through unfragmented).
448 		 */
449 		if (sk->sk_state == TCP_LISTEN)
450 			goto out;
451 
452 		if (!ip6_sk_accept_pmtu(sk))
453 			goto out;
454 
455 		if (mtu < IPV6_MIN_MTU)
456 			goto out;
457 
458 		WRITE_ONCE(tp->mtu_info, mtu);
459 
460 		if (!sock_owned_by_user(sk))
461 			tcp_v6_mtu_reduced(sk);
462 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
463 					   &sk->sk_tsq_flags))
464 			sock_hold(sk);
465 		goto out;
466 	}
467 
468 
469 	/* Might be for an request_sock */
470 	switch (sk->sk_state) {
471 	case TCP_SYN_SENT:
472 	case TCP_SYN_RECV:
473 		/* Only in fast or simultaneous open. If a fast open socket is
474 		 * is already accepted it is treated as a connected one below.
475 		 */
476 		if (fastopen && !fastopen->sk)
477 			break;
478 
479 		if (!sock_owned_by_user(sk)) {
480 			sk->sk_err = err;
481 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
482 
483 			tcp_done(sk);
484 		} else
485 			sk->sk_err_soft = err;
486 		goto out;
487 	}
488 
489 	if (!sock_owned_by_user(sk) && np->recverr) {
490 		sk->sk_err = err;
491 		sk->sk_error_report(sk);
492 	} else
493 		sk->sk_err_soft = err;
494 
495 out:
496 	bh_unlock_sock(sk);
497 	sock_put(sk);
498 	return 0;
499 }
500 
501 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type)502 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
503 			      struct flowi *fl,
504 			      struct request_sock *req,
505 			      struct tcp_fastopen_cookie *foc,
506 			      enum tcp_synack_type synack_type)
507 {
508 	struct inet_request_sock *ireq = inet_rsk(req);
509 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
510 	struct ipv6_txoptions *opt;
511 	struct flowi6 *fl6 = &fl->u.ip6;
512 	struct sk_buff *skb;
513 	int err = -ENOMEM;
514 
515 	/* First, grab a route. */
516 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
517 					       IPPROTO_TCP)) == NULL)
518 		goto done;
519 
520 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
521 
522 	if (skb) {
523 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
524 				    &ireq->ir_v6_rmt_addr);
525 
526 		fl6->daddr = ireq->ir_v6_rmt_addr;
527 		if (np->repflow && ireq->pktopts)
528 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
529 
530 		rcu_read_lock();
531 		opt = ireq->ipv6_opt;
532 		if (!opt)
533 			opt = rcu_dereference(np->opt);
534 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
535 			       np->tclass, sk->sk_priority);
536 		rcu_read_unlock();
537 		err = net_xmit_eval(err);
538 	}
539 
540 done:
541 	return err;
542 }
543 
544 
tcp_v6_reqsk_destructor(struct request_sock * req)545 static void tcp_v6_reqsk_destructor(struct request_sock *req)
546 {
547 	kfree(inet_rsk(req)->ipv6_opt);
548 	kfree_skb(inet_rsk(req)->pktopts);
549 }
550 
551 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)552 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
553 						   const struct in6_addr *addr)
554 {
555 	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
556 }
557 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)558 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
559 						const struct sock *addr_sk)
560 {
561 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
562 }
563 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,char __user * optval,int optlen)564 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
565 				 char __user *optval, int optlen)
566 {
567 	struct tcp_md5sig cmd;
568 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
569 	u8 prefixlen;
570 
571 	if (optlen < sizeof(cmd))
572 		return -EINVAL;
573 
574 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
575 		return -EFAULT;
576 
577 	if (sin6->sin6_family != AF_INET6)
578 		return -EINVAL;
579 
580 	if (optname == TCP_MD5SIG_EXT &&
581 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
582 		prefixlen = cmd.tcpm_prefixlen;
583 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
584 					prefixlen > 32))
585 			return -EINVAL;
586 	} else {
587 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
588 	}
589 
590 	if (!cmd.tcpm_keylen) {
591 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
592 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
593 					      AF_INET, prefixlen);
594 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
595 				      AF_INET6, prefixlen);
596 	}
597 
598 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
599 		return -EINVAL;
600 
601 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
602 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
603 				      AF_INET, prefixlen, cmd.tcpm_key,
604 				      cmd.tcpm_keylen, GFP_KERNEL);
605 
606 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
607 			      AF_INET6, prefixlen, cmd.tcpm_key,
608 			      cmd.tcpm_keylen, GFP_KERNEL);
609 }
610 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)611 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
612 				   const struct in6_addr *daddr,
613 				   const struct in6_addr *saddr,
614 				   const struct tcphdr *th, int nbytes)
615 {
616 	struct tcp6_pseudohdr *bp;
617 	struct scatterlist sg;
618 	struct tcphdr *_th;
619 
620 	bp = hp->scratch;
621 	/* 1. TCP pseudo-header (RFC2460) */
622 	bp->saddr = *saddr;
623 	bp->daddr = *daddr;
624 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
625 	bp->len = cpu_to_be32(nbytes);
626 
627 	_th = (struct tcphdr *)(bp + 1);
628 	memcpy(_th, th, sizeof(*th));
629 	_th->check = 0;
630 
631 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
632 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
633 				sizeof(*bp) + sizeof(*th));
634 	return crypto_ahash_update(hp->md5_req);
635 }
636 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)637 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
638 			       const struct in6_addr *daddr, struct in6_addr *saddr,
639 			       const struct tcphdr *th)
640 {
641 	struct tcp_md5sig_pool *hp;
642 	struct ahash_request *req;
643 
644 	hp = tcp_get_md5sig_pool();
645 	if (!hp)
646 		goto clear_hash_noput;
647 	req = hp->md5_req;
648 
649 	if (crypto_ahash_init(req))
650 		goto clear_hash;
651 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
652 		goto clear_hash;
653 	if (tcp_md5_hash_key(hp, key))
654 		goto clear_hash;
655 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
656 	if (crypto_ahash_final(req))
657 		goto clear_hash;
658 
659 	tcp_put_md5sig_pool();
660 	return 0;
661 
662 clear_hash:
663 	tcp_put_md5sig_pool();
664 clear_hash_noput:
665 	memset(md5_hash, 0, 16);
666 	return 1;
667 }
668 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)669 static int tcp_v6_md5_hash_skb(char *md5_hash,
670 			       const struct tcp_md5sig_key *key,
671 			       const struct sock *sk,
672 			       const struct sk_buff *skb)
673 {
674 	const struct in6_addr *saddr, *daddr;
675 	struct tcp_md5sig_pool *hp;
676 	struct ahash_request *req;
677 	const struct tcphdr *th = tcp_hdr(skb);
678 
679 	if (sk) { /* valid for establish/request sockets */
680 		saddr = &sk->sk_v6_rcv_saddr;
681 		daddr = &sk->sk_v6_daddr;
682 	} else {
683 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
684 		saddr = &ip6h->saddr;
685 		daddr = &ip6h->daddr;
686 	}
687 
688 	hp = tcp_get_md5sig_pool();
689 	if (!hp)
690 		goto clear_hash_noput;
691 	req = hp->md5_req;
692 
693 	if (crypto_ahash_init(req))
694 		goto clear_hash;
695 
696 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
697 		goto clear_hash;
698 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
699 		goto clear_hash;
700 	if (tcp_md5_hash_key(hp, key))
701 		goto clear_hash;
702 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
703 	if (crypto_ahash_final(req))
704 		goto clear_hash;
705 
706 	tcp_put_md5sig_pool();
707 	return 0;
708 
709 clear_hash:
710 	tcp_put_md5sig_pool();
711 clear_hash_noput:
712 	memset(md5_hash, 0, 16);
713 	return 1;
714 }
715 
716 #endif
717 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb)718 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
719 				    const struct sk_buff *skb)
720 {
721 #ifdef CONFIG_TCP_MD5SIG
722 	const __u8 *hash_location = NULL;
723 	struct tcp_md5sig_key *hash_expected;
724 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
725 	const struct tcphdr *th = tcp_hdr(skb);
726 	int genhash;
727 	u8 newhash[16];
728 
729 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
730 	hash_location = tcp_parse_md5sig_option(th);
731 
732 	/* We've parsed the options - do we have a hash? */
733 	if (!hash_expected && !hash_location)
734 		return false;
735 
736 	if (hash_expected && !hash_location) {
737 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
738 		return true;
739 	}
740 
741 	if (!hash_expected && hash_location) {
742 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
743 		return true;
744 	}
745 
746 	/* check the signature */
747 	genhash = tcp_v6_md5_hash_skb(newhash,
748 				      hash_expected,
749 				      NULL, skb);
750 
751 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
752 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
753 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
754 				     genhash ? "failed" : "mismatch",
755 				     &ip6h->saddr, ntohs(th->source),
756 				     &ip6h->daddr, ntohs(th->dest));
757 		return true;
758 	}
759 #endif
760 	return false;
761 }
762 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)763 static void tcp_v6_init_req(struct request_sock *req,
764 			    const struct sock *sk_listener,
765 			    struct sk_buff *skb)
766 {
767 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
768 	struct inet_request_sock *ireq = inet_rsk(req);
769 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
770 
771 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
772 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
773 
774 	/* So that link locals have meaning */
775 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
776 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
777 		ireq->ir_iif = tcp_v6_iif(skb);
778 
779 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
780 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
781 	     np->rxopt.bits.rxinfo ||
782 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
783 	     np->rxopt.bits.rxohlim || np->repflow)) {
784 		refcount_inc(&skb->users);
785 		ireq->pktopts = skb;
786 	}
787 }
788 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)789 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
790 					  struct flowi *fl,
791 					  const struct request_sock *req)
792 {
793 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
794 }
795 
796 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
797 	.family		=	AF_INET6,
798 	.obj_size	=	sizeof(struct tcp6_request_sock),
799 	.rtx_syn_ack	=	tcp_rtx_synack,
800 	.send_ack	=	tcp_v6_reqsk_send_ack,
801 	.destructor	=	tcp_v6_reqsk_destructor,
802 	.send_reset	=	tcp_v6_send_reset,
803 	.syn_ack_timeout =	tcp_syn_ack_timeout,
804 };
805 
806 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
807 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
808 				sizeof(struct ipv6hdr),
809 #ifdef CONFIG_TCP_MD5SIG
810 	.req_md5_lookup	=	tcp_v6_md5_lookup,
811 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
812 #endif
813 	.init_req	=	tcp_v6_init_req,
814 #ifdef CONFIG_SYN_COOKIES
815 	.cookie_init_seq =	cookie_v6_init_sequence,
816 #endif
817 	.route_req	=	tcp_v6_route_req,
818 	.init_seq	=	tcp_v6_init_seq,
819 	.init_ts_off	=	tcp_v6_init_ts_off,
820 	.send_synack	=	tcp_v6_send_synack,
821 };
822 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)823 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
824 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
825 				 int oif, struct tcp_md5sig_key *key, int rst,
826 				 u8 tclass, __be32 label, u32 priority)
827 {
828 	const struct tcphdr *th = tcp_hdr(skb);
829 	struct tcphdr *t1;
830 	struct sk_buff *buff;
831 	struct flowi6 fl6;
832 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
833 	struct sock *ctl_sk = net->ipv6.tcp_sk;
834 	unsigned int tot_len = sizeof(struct tcphdr);
835 	struct dst_entry *dst;
836 	__be32 *topt;
837 	__u32 mark = 0;
838 
839 	if (tsecr)
840 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
841 #ifdef CONFIG_TCP_MD5SIG
842 	if (key)
843 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
844 #endif
845 
846 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
847 			 GFP_ATOMIC);
848 	if (!buff)
849 		return;
850 
851 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
852 
853 	t1 = skb_push(buff, tot_len);
854 	skb_reset_transport_header(buff);
855 
856 	/* Swap the send and the receive. */
857 	memset(t1, 0, sizeof(*t1));
858 	t1->dest = th->source;
859 	t1->source = th->dest;
860 	t1->doff = tot_len / 4;
861 	t1->seq = htonl(seq);
862 	t1->ack_seq = htonl(ack);
863 	t1->ack = !rst || !th->ack;
864 	t1->rst = rst;
865 	t1->window = htons(win);
866 
867 	topt = (__be32 *)(t1 + 1);
868 
869 	if (tsecr) {
870 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
871 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
872 		*topt++ = htonl(tsval);
873 		*topt++ = htonl(tsecr);
874 	}
875 
876 #ifdef CONFIG_TCP_MD5SIG
877 	if (key) {
878 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
879 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
880 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
881 				    &ipv6_hdr(skb)->saddr,
882 				    &ipv6_hdr(skb)->daddr, t1);
883 	}
884 #endif
885 
886 	memset(&fl6, 0, sizeof(fl6));
887 	fl6.daddr = ipv6_hdr(skb)->saddr;
888 	fl6.saddr = ipv6_hdr(skb)->daddr;
889 	fl6.flowlabel = label;
890 
891 	buff->ip_summed = CHECKSUM_PARTIAL;
892 	buff->csum = 0;
893 
894 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
895 
896 	fl6.flowi6_proto = IPPROTO_TCP;
897 	if (rt6_need_strict(&fl6.daddr) && !oif)
898 		fl6.flowi6_oif = tcp_v6_iif(skb);
899 	else {
900 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
901 			oif = skb->skb_iif;
902 
903 		fl6.flowi6_oif = oif;
904 	}
905 
906 	if (sk) {
907 		if (sk->sk_state == TCP_TIME_WAIT) {
908 			mark = inet_twsk(sk)->tw_mark;
909 			/* autoflowlabel relies on buff->hash */
910 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
911 				     PKT_HASH_TYPE_L4);
912 		} else {
913 			mark = sk->sk_mark;
914 		}
915 		buff->tstamp = tcp_transmit_time(sk);
916 	}
917 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
918 	fl6.fl6_dport = t1->dest;
919 	fl6.fl6_sport = t1->source;
920 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
921 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
922 
923 	/* Pass a socket to ip6_dst_lookup either it is for RST
924 	 * Underlying function will use this to retrieve the network
925 	 * namespace
926 	 */
927 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
928 	if (!IS_ERR(dst)) {
929 		skb_dst_set(buff, dst);
930 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
931 			 priority);
932 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
933 		if (rst)
934 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
935 		return;
936 	}
937 
938 	kfree_skb(buff);
939 }
940 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)941 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
942 {
943 	const struct tcphdr *th = tcp_hdr(skb);
944 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
945 	u32 seq = 0, ack_seq = 0;
946 	struct tcp_md5sig_key *key = NULL;
947 #ifdef CONFIG_TCP_MD5SIG
948 	const __u8 *hash_location = NULL;
949 	unsigned char newhash[16];
950 	int genhash;
951 	struct sock *sk1 = NULL;
952 #endif
953 	__be32 label = 0;
954 	u32 priority = 0;
955 	struct net *net;
956 	int oif = 0;
957 
958 	if (th->rst)
959 		return;
960 
961 	/* If sk not NULL, it means we did a successful lookup and incoming
962 	 * route had to be correct. prequeue might have dropped our dst.
963 	 */
964 	if (!sk && !ipv6_unicast_destination(skb))
965 		return;
966 
967 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
968 #ifdef CONFIG_TCP_MD5SIG
969 	rcu_read_lock();
970 	hash_location = tcp_parse_md5sig_option(th);
971 	if (sk && sk_fullsock(sk)) {
972 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
973 	} else if (hash_location) {
974 		/*
975 		 * active side is lost. Try to find listening socket through
976 		 * source port, and then find md5 key through listening socket.
977 		 * we are not loose security here:
978 		 * Incoming packet is checked with md5 hash with finding key,
979 		 * no RST generated if md5 hash doesn't match.
980 		 */
981 		sk1 = inet6_lookup_listener(net,
982 					   &tcp_hashinfo, NULL, 0,
983 					   &ipv6h->saddr,
984 					   th->source, &ipv6h->daddr,
985 					   ntohs(th->source),
986 					   tcp_v6_iif_l3_slave(skb),
987 					   tcp_v6_sdif(skb));
988 		if (!sk1)
989 			goto out;
990 
991 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
992 		if (!key)
993 			goto out;
994 
995 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
996 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
997 			goto out;
998 	}
999 #endif
1000 
1001 	if (th->ack)
1002 		seq = ntohl(th->ack_seq);
1003 	else
1004 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1005 			  (th->doff << 2);
1006 
1007 	if (sk) {
1008 		oif = sk->sk_bound_dev_if;
1009 		if (sk_fullsock(sk)) {
1010 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1011 
1012 			trace_tcp_send_reset(sk, skb);
1013 			if (np->repflow)
1014 				label = ip6_flowlabel(ipv6h);
1015 			priority = sk->sk_priority;
1016 		}
1017 		if (sk->sk_state == TCP_TIME_WAIT) {
1018 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1019 			priority = inet_twsk(sk)->tw_priority;
1020 		}
1021 	} else {
1022 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1023 			label = ip6_flowlabel(ipv6h);
1024 	}
1025 
1026 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1027 			     label, priority);
1028 
1029 #ifdef CONFIG_TCP_MD5SIG
1030 out:
1031 	rcu_read_unlock();
1032 #endif
1033 }
1034 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1035 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1036 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1037 			    struct tcp_md5sig_key *key, u8 tclass,
1038 			    __be32 label, u32 priority)
1039 {
1040 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1041 			     tclass, label, priority);
1042 }
1043 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1044 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1045 {
1046 	struct inet_timewait_sock *tw = inet_twsk(sk);
1047 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1048 
1049 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1050 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1051 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1052 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1053 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1054 
1055 	inet_twsk_put(tw);
1056 }
1057 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1058 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1059 				  struct request_sock *req)
1060 {
1061 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1062 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1063 	 */
1064 	/* RFC 7323 2.3
1065 	 * The window field (SEG.WND) of every outgoing segment, with the
1066 	 * exception of <SYN> segments, MUST be right-shifted by
1067 	 * Rcv.Wind.Shift bits:
1068 	 */
1069 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1070 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1071 			tcp_rsk(req)->rcv_nxt,
1072 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1073 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1074 			req->ts_recent, sk->sk_bound_dev_if,
1075 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1076 			0, 0, sk->sk_priority);
1077 }
1078 
1079 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1080 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1081 {
1082 #ifdef CONFIG_SYN_COOKIES
1083 	const struct tcphdr *th = tcp_hdr(skb);
1084 
1085 	if (!th->syn)
1086 		sk = cookie_v6_check(sk, skb);
1087 #endif
1088 	return sk;
1089 }
1090 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1091 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1092 			 struct tcphdr *th, u32 *cookie)
1093 {
1094 	u16 mss = 0;
1095 #ifdef CONFIG_SYN_COOKIES
1096 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1097 				    &tcp_request_sock_ipv6_ops, sk, th);
1098 	if (mss) {
1099 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1100 		tcp_synq_overflow(sk);
1101 	}
1102 #endif
1103 	return mss;
1104 }
1105 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1106 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1107 {
1108 	if (skb->protocol == htons(ETH_P_IP))
1109 		return tcp_v4_conn_request(sk, skb);
1110 
1111 	if (!ipv6_unicast_destination(skb))
1112 		goto drop;
1113 
1114 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1115 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1116 		return 0;
1117 	}
1118 
1119 	return tcp_conn_request(&tcp6_request_sock_ops,
1120 				&tcp_request_sock_ipv6_ops, sk, skb);
1121 
1122 drop:
1123 	tcp_listendrop(sk);
1124 	return 0; /* don't send reset */
1125 }
1126 
tcp_v6_restore_cb(struct sk_buff * skb)1127 static void tcp_v6_restore_cb(struct sk_buff *skb)
1128 {
1129 	/* We need to move header back to the beginning if xfrm6_policy_check()
1130 	 * and tcp_v6_fill_cb() are going to be called again.
1131 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1132 	 */
1133 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1134 		sizeof(struct inet6_skb_parm));
1135 }
1136 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1137 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1138 					 struct request_sock *req,
1139 					 struct dst_entry *dst,
1140 					 struct request_sock *req_unhash,
1141 					 bool *own_req)
1142 {
1143 	struct inet_request_sock *ireq;
1144 	struct ipv6_pinfo *newnp;
1145 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1146 	struct ipv6_txoptions *opt;
1147 	struct inet_sock *newinet;
1148 	bool found_dup_sk = false;
1149 	struct tcp_sock *newtp;
1150 	struct sock *newsk;
1151 #ifdef CONFIG_TCP_MD5SIG
1152 	struct tcp_md5sig_key *key;
1153 #endif
1154 	struct flowi6 fl6;
1155 
1156 	if (skb->protocol == htons(ETH_P_IP)) {
1157 		/*
1158 		 *	v6 mapped
1159 		 */
1160 
1161 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1162 					     req_unhash, own_req);
1163 
1164 		if (!newsk)
1165 			return NULL;
1166 
1167 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1168 
1169 		newinet = inet_sk(newsk);
1170 		newnp = tcp_inet6_sk(newsk);
1171 		newtp = tcp_sk(newsk);
1172 
1173 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1174 
1175 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1176 
1177 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1178 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1179 #ifdef CONFIG_TCP_MD5SIG
1180 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1181 #endif
1182 
1183 		newnp->ipv6_mc_list = NULL;
1184 		newnp->ipv6_ac_list = NULL;
1185 		newnp->ipv6_fl_list = NULL;
1186 		newnp->pktoptions  = NULL;
1187 		newnp->opt	   = NULL;
1188 		newnp->mcast_oif   = inet_iif(skb);
1189 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1190 		newnp->rcv_flowinfo = 0;
1191 		if (np->repflow)
1192 			newnp->flow_label = 0;
1193 
1194 		/*
1195 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1196 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1197 		 * that function for the gory details. -acme
1198 		 */
1199 
1200 		/* It is tricky place. Until this moment IPv4 tcp
1201 		   worked with IPv6 icsk.icsk_af_ops.
1202 		   Sync it now.
1203 		 */
1204 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1205 
1206 		return newsk;
1207 	}
1208 
1209 	ireq = inet_rsk(req);
1210 
1211 	if (sk_acceptq_is_full(sk))
1212 		goto out_overflow;
1213 
1214 	if (!dst) {
1215 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1216 		if (!dst)
1217 			goto out;
1218 	}
1219 
1220 	newsk = tcp_create_openreq_child(sk, req, skb);
1221 	if (!newsk)
1222 		goto out_nonewsk;
1223 
1224 	/*
1225 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1226 	 * count here, tcp_create_openreq_child now does this for us, see the
1227 	 * comment in that function for the gory details. -acme
1228 	 */
1229 
1230 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1231 	ip6_dst_store(newsk, dst, NULL, NULL);
1232 	inet6_sk_rx_dst_set(newsk, skb);
1233 
1234 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1235 
1236 	newtp = tcp_sk(newsk);
1237 	newinet = inet_sk(newsk);
1238 	newnp = tcp_inet6_sk(newsk);
1239 
1240 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1241 
1242 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1243 	newnp->saddr = ireq->ir_v6_loc_addr;
1244 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1245 	newsk->sk_bound_dev_if = ireq->ir_iif;
1246 
1247 	/* Now IPv6 options...
1248 
1249 	   First: no IPv4 options.
1250 	 */
1251 	newinet->inet_opt = NULL;
1252 	newnp->ipv6_mc_list = NULL;
1253 	newnp->ipv6_ac_list = NULL;
1254 	newnp->ipv6_fl_list = NULL;
1255 
1256 	/* Clone RX bits */
1257 	newnp->rxopt.all = np->rxopt.all;
1258 
1259 	newnp->pktoptions = NULL;
1260 	newnp->opt	  = NULL;
1261 	newnp->mcast_oif  = tcp_v6_iif(skb);
1262 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1263 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1264 	if (np->repflow)
1265 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1266 
1267 	/* Clone native IPv6 options from listening socket (if any)
1268 
1269 	   Yes, keeping reference count would be much more clever,
1270 	   but we make one more one thing there: reattach optmem
1271 	   to newsk.
1272 	 */
1273 	opt = ireq->ipv6_opt;
1274 	if (!opt)
1275 		opt = rcu_dereference(np->opt);
1276 	if (opt) {
1277 		opt = ipv6_dup_options(newsk, opt);
1278 		RCU_INIT_POINTER(newnp->opt, opt);
1279 	}
1280 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1281 	if (opt)
1282 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1283 						    opt->opt_flen;
1284 
1285 	tcp_ca_openreq_child(newsk, dst);
1286 
1287 	tcp_sync_mss(newsk, dst_mtu(dst));
1288 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1289 
1290 	tcp_initialize_rcv_mss(newsk);
1291 
1292 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1293 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1294 
1295 #ifdef CONFIG_TCP_MD5SIG
1296 	/* Copy over the MD5 key from the original socket */
1297 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1298 	if (key) {
1299 		/* We're using one, so create a matching key
1300 		 * on the newsk structure. If we fail to get
1301 		 * memory, then we end up not copying the key
1302 		 * across. Shucks.
1303 		 */
1304 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1305 			       AF_INET6, 128, key->key, key->keylen,
1306 			       sk_gfp_mask(sk, GFP_ATOMIC));
1307 	}
1308 #endif
1309 
1310 	if (__inet_inherit_port(sk, newsk) < 0) {
1311 		inet_csk_prepare_forced_close(newsk);
1312 		tcp_done(newsk);
1313 		goto out;
1314 	}
1315 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1316 				       &found_dup_sk);
1317 	if (*own_req) {
1318 		tcp_move_syn(newtp, req);
1319 
1320 		/* Clone pktoptions received with SYN, if we own the req */
1321 		if (ireq->pktopts) {
1322 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1323 			consume_skb(ireq->pktopts);
1324 			ireq->pktopts = NULL;
1325 			if (newnp->pktoptions)
1326 				tcp_v6_restore_cb(newnp->pktoptions);
1327 		}
1328 	} else {
1329 		if (!req_unhash && found_dup_sk) {
1330 			/* This code path should only be executed in the
1331 			 * syncookie case only
1332 			 */
1333 			bh_unlock_sock(newsk);
1334 			sock_put(newsk);
1335 			newsk = NULL;
1336 		}
1337 	}
1338 
1339 	return newsk;
1340 
1341 out_overflow:
1342 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1343 out_nonewsk:
1344 	dst_release(dst);
1345 out:
1346 	tcp_listendrop(sk);
1347 	return NULL;
1348 }
1349 
1350 /* The socket must have it's spinlock held when we get
1351  * here, unless it is a TCP_LISTEN socket.
1352  *
1353  * We have a potential double-lock case here, so even when
1354  * doing backlog processing we use the BH locking scheme.
1355  * This is because we cannot sleep with the original spinlock
1356  * held.
1357  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1358 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1359 {
1360 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1361 	struct sk_buff *opt_skb = NULL;
1362 	struct tcp_sock *tp;
1363 
1364 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1365 	   goes to IPv4 receive handler and backlogged.
1366 	   From backlog it always goes here. Kerboom...
1367 	   Fortunately, tcp_rcv_established and rcv_established
1368 	   handle them correctly, but it is not case with
1369 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1370 	 */
1371 
1372 	if (skb->protocol == htons(ETH_P_IP))
1373 		return tcp_v4_do_rcv(sk, skb);
1374 
1375 	/*
1376 	 *	socket locking is here for SMP purposes as backlog rcv
1377 	 *	is currently called with bh processing disabled.
1378 	 */
1379 
1380 	/* Do Stevens' IPV6_PKTOPTIONS.
1381 
1382 	   Yes, guys, it is the only place in our code, where we
1383 	   may make it not affecting IPv4.
1384 	   The rest of code is protocol independent,
1385 	   and I do not like idea to uglify IPv4.
1386 
1387 	   Actually, all the idea behind IPV6_PKTOPTIONS
1388 	   looks not very well thought. For now we latch
1389 	   options, received in the last packet, enqueued
1390 	   by tcp. Feel free to propose better solution.
1391 					       --ANK (980728)
1392 	 */
1393 	if (np->rxopt.all)
1394 		opt_skb = skb_clone_and_charge_r(skb, sk);
1395 
1396 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1397 		struct dst_entry *dst;
1398 
1399 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1400 						lockdep_sock_is_held(sk));
1401 
1402 		sock_rps_save_rxhash(sk, skb);
1403 		sk_mark_napi_id(sk, skb);
1404 		if (dst) {
1405 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1406 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1407 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1408 				dst_release(dst);
1409 			}
1410 		}
1411 
1412 		tcp_rcv_established(sk, skb);
1413 		if (opt_skb)
1414 			goto ipv6_pktoptions;
1415 		return 0;
1416 	}
1417 
1418 	if (tcp_checksum_complete(skb))
1419 		goto csum_err;
1420 
1421 	if (sk->sk_state == TCP_LISTEN) {
1422 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1423 
1424 		if (!nsk)
1425 			goto discard;
1426 
1427 		if (nsk != sk) {
1428 			if (tcp_child_process(sk, nsk, skb))
1429 				goto reset;
1430 			if (opt_skb)
1431 				__kfree_skb(opt_skb);
1432 			return 0;
1433 		}
1434 	} else
1435 		sock_rps_save_rxhash(sk, skb);
1436 
1437 	if (tcp_rcv_state_process(sk, skb))
1438 		goto reset;
1439 	if (opt_skb)
1440 		goto ipv6_pktoptions;
1441 	return 0;
1442 
1443 reset:
1444 	tcp_v6_send_reset(sk, skb);
1445 discard:
1446 	if (opt_skb)
1447 		__kfree_skb(opt_skb);
1448 	kfree_skb(skb);
1449 	return 0;
1450 csum_err:
1451 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1452 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1453 	goto discard;
1454 
1455 
1456 ipv6_pktoptions:
1457 	/* Do you ask, what is it?
1458 
1459 	   1. skb was enqueued by tcp.
1460 	   2. skb is added to tail of read queue, rather than out of order.
1461 	   3. socket is not in passive state.
1462 	   4. Finally, it really contains options, which user wants to receive.
1463 	 */
1464 	tp = tcp_sk(sk);
1465 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1466 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1467 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1468 			np->mcast_oif = tcp_v6_iif(opt_skb);
1469 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1470 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1471 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1472 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1473 		if (np->repflow)
1474 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1475 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1476 			tcp_v6_restore_cb(opt_skb);
1477 			opt_skb = xchg(&np->pktoptions, opt_skb);
1478 		} else {
1479 			__kfree_skb(opt_skb);
1480 			opt_skb = xchg(&np->pktoptions, NULL);
1481 		}
1482 	}
1483 
1484 	kfree_skb(opt_skb);
1485 	return 0;
1486 }
1487 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1488 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1489 			   const struct tcphdr *th)
1490 {
1491 	/* This is tricky: we move IP6CB at its correct location into
1492 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1493 	 * _decode_session6() uses IP6CB().
1494 	 * barrier() makes sure compiler won't play aliasing games.
1495 	 */
1496 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1497 		sizeof(struct inet6_skb_parm));
1498 	barrier();
1499 
1500 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1501 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1502 				    skb->len - th->doff*4);
1503 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1504 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1505 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1506 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1507 	TCP_SKB_CB(skb)->sacked = 0;
1508 	TCP_SKB_CB(skb)->has_rxtstamp =
1509 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1510 }
1511 
tcp_v6_rcv(struct sk_buff * skb)1512 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1513 {
1514 	struct sk_buff *skb_to_free;
1515 	int sdif = inet6_sdif(skb);
1516 	const struct tcphdr *th;
1517 	const struct ipv6hdr *hdr;
1518 	bool refcounted;
1519 	struct sock *sk;
1520 	int ret;
1521 	struct net *net = dev_net(skb->dev);
1522 
1523 	if (skb->pkt_type != PACKET_HOST)
1524 		goto discard_it;
1525 
1526 	/*
1527 	 *	Count it even if it's bad.
1528 	 */
1529 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1530 
1531 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1532 		goto discard_it;
1533 
1534 	th = (const struct tcphdr *)skb->data;
1535 
1536 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1537 		goto bad_packet;
1538 	if (!pskb_may_pull(skb, th->doff*4))
1539 		goto discard_it;
1540 
1541 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1542 		goto csum_error;
1543 
1544 	th = (const struct tcphdr *)skb->data;
1545 	hdr = ipv6_hdr(skb);
1546 
1547 lookup:
1548 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1549 				th->source, th->dest, inet6_iif(skb), sdif,
1550 				&refcounted);
1551 	if (!sk)
1552 		goto no_tcp_socket;
1553 
1554 process:
1555 	if (sk->sk_state == TCP_TIME_WAIT)
1556 		goto do_time_wait;
1557 
1558 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1559 		struct request_sock *req = inet_reqsk(sk);
1560 		bool req_stolen = false;
1561 		struct sock *nsk;
1562 
1563 		sk = req->rsk_listener;
1564 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
1565 			sk_drops_add(sk, skb);
1566 			reqsk_put(req);
1567 			goto discard_it;
1568 		}
1569 		if (tcp_checksum_complete(skb)) {
1570 			reqsk_put(req);
1571 			goto csum_error;
1572 		}
1573 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1574 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1575 			goto lookup;
1576 		}
1577 		sock_hold(sk);
1578 		refcounted = true;
1579 		nsk = NULL;
1580 		if (!tcp_filter(sk, skb)) {
1581 			th = (const struct tcphdr *)skb->data;
1582 			hdr = ipv6_hdr(skb);
1583 			tcp_v6_fill_cb(skb, hdr, th);
1584 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1585 		}
1586 		if (!nsk) {
1587 			reqsk_put(req);
1588 			if (req_stolen) {
1589 				/* Another cpu got exclusive access to req
1590 				 * and created a full blown socket.
1591 				 * Try to feed this packet to this socket
1592 				 * instead of discarding it.
1593 				 */
1594 				tcp_v6_restore_cb(skb);
1595 				sock_put(sk);
1596 				goto lookup;
1597 			}
1598 			goto discard_and_relse;
1599 		}
1600 		if (nsk == sk) {
1601 			reqsk_put(req);
1602 			tcp_v6_restore_cb(skb);
1603 		} else if (tcp_child_process(sk, nsk, skb)) {
1604 			tcp_v6_send_reset(nsk, skb);
1605 			goto discard_and_relse;
1606 		} else {
1607 			sock_put(sk);
1608 			return 0;
1609 		}
1610 	}
1611 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1612 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1613 		goto discard_and_relse;
1614 	}
1615 
1616 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1617 		goto discard_and_relse;
1618 
1619 	if (tcp_v6_inbound_md5_hash(sk, skb))
1620 		goto discard_and_relse;
1621 
1622 	if (tcp_filter(sk, skb))
1623 		goto discard_and_relse;
1624 	th = (const struct tcphdr *)skb->data;
1625 	hdr = ipv6_hdr(skb);
1626 	tcp_v6_fill_cb(skb, hdr, th);
1627 
1628 	skb->dev = NULL;
1629 
1630 	if (sk->sk_state == TCP_LISTEN) {
1631 		ret = tcp_v6_do_rcv(sk, skb);
1632 		goto put_and_return;
1633 	}
1634 
1635 	sk_incoming_cpu_update(sk);
1636 
1637 	bh_lock_sock_nested(sk);
1638 	tcp_segs_in(tcp_sk(sk), skb);
1639 	ret = 0;
1640 	if (!sock_owned_by_user(sk)) {
1641 		skb_to_free = sk->sk_rx_skb_cache;
1642 		sk->sk_rx_skb_cache = NULL;
1643 		ret = tcp_v6_do_rcv(sk, skb);
1644 	} else {
1645 		if (tcp_add_backlog(sk, skb))
1646 			goto discard_and_relse;
1647 		skb_to_free = NULL;
1648 	}
1649 	bh_unlock_sock(sk);
1650 	if (skb_to_free)
1651 		__kfree_skb(skb_to_free);
1652 put_and_return:
1653 	if (refcounted)
1654 		sock_put(sk);
1655 	return ret ? -1 : 0;
1656 
1657 no_tcp_socket:
1658 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1659 		goto discard_it;
1660 
1661 	tcp_v6_fill_cb(skb, hdr, th);
1662 
1663 	if (tcp_checksum_complete(skb)) {
1664 csum_error:
1665 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1666 bad_packet:
1667 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1668 	} else {
1669 		tcp_v6_send_reset(NULL, skb);
1670 	}
1671 
1672 discard_it:
1673 	kfree_skb(skb);
1674 	return 0;
1675 
1676 discard_and_relse:
1677 	sk_drops_add(sk, skb);
1678 	if (refcounted)
1679 		sock_put(sk);
1680 	goto discard_it;
1681 
1682 do_time_wait:
1683 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1684 		inet_twsk_put(inet_twsk(sk));
1685 		goto discard_it;
1686 	}
1687 
1688 	tcp_v6_fill_cb(skb, hdr, th);
1689 
1690 	if (tcp_checksum_complete(skb)) {
1691 		inet_twsk_put(inet_twsk(sk));
1692 		goto csum_error;
1693 	}
1694 
1695 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1696 	case TCP_TW_SYN:
1697 	{
1698 		struct sock *sk2;
1699 
1700 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1701 					    skb, __tcp_hdrlen(th),
1702 					    &ipv6_hdr(skb)->saddr, th->source,
1703 					    &ipv6_hdr(skb)->daddr,
1704 					    ntohs(th->dest),
1705 					    tcp_v6_iif_l3_slave(skb),
1706 					    sdif);
1707 		if (sk2) {
1708 			struct inet_timewait_sock *tw = inet_twsk(sk);
1709 			inet_twsk_deschedule_put(tw);
1710 			sk = sk2;
1711 			tcp_v6_restore_cb(skb);
1712 			refcounted = false;
1713 			goto process;
1714 		}
1715 	}
1716 		/* to ACK */
1717 		/* fall through */
1718 	case TCP_TW_ACK:
1719 		tcp_v6_timewait_ack(sk, skb);
1720 		break;
1721 	case TCP_TW_RST:
1722 		tcp_v6_send_reset(sk, skb);
1723 		inet_twsk_deschedule_put(inet_twsk(sk));
1724 		goto discard_it;
1725 	case TCP_TW_SUCCESS:
1726 		;
1727 	}
1728 	goto discard_it;
1729 }
1730 
tcp_v6_early_demux(struct sk_buff * skb)1731 void tcp_v6_early_demux(struct sk_buff *skb)
1732 {
1733 	const struct ipv6hdr *hdr;
1734 	const struct tcphdr *th;
1735 	struct sock *sk;
1736 
1737 	if (skb->pkt_type != PACKET_HOST)
1738 		return;
1739 
1740 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1741 		return;
1742 
1743 	hdr = ipv6_hdr(skb);
1744 	th = tcp_hdr(skb);
1745 
1746 	if (th->doff < sizeof(struct tcphdr) / 4)
1747 		return;
1748 
1749 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1750 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1751 					&hdr->saddr, th->source,
1752 					&hdr->daddr, ntohs(th->dest),
1753 					inet6_iif(skb), inet6_sdif(skb));
1754 	if (sk) {
1755 		skb->sk = sk;
1756 		skb->destructor = sock_edemux;
1757 		if (sk_fullsock(sk)) {
1758 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1759 
1760 			if (dst)
1761 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1762 			if (dst &&
1763 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1764 				skb_dst_set_noref(skb, dst);
1765 		}
1766 	}
1767 }
1768 
1769 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1770 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1771 	.twsk_unique	= tcp_twsk_unique,
1772 	.twsk_destructor = tcp_twsk_destructor,
1773 };
1774 
1775 static const struct inet_connection_sock_af_ops ipv6_specific = {
1776 	.queue_xmit	   = inet6_csk_xmit,
1777 	.send_check	   = tcp_v6_send_check,
1778 	.rebuild_header	   = inet6_sk_rebuild_header,
1779 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1780 	.conn_request	   = tcp_v6_conn_request,
1781 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1782 	.net_header_len	   = sizeof(struct ipv6hdr),
1783 	.net_frag_header_len = sizeof(struct frag_hdr),
1784 	.setsockopt	   = ipv6_setsockopt,
1785 	.getsockopt	   = ipv6_getsockopt,
1786 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1787 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1788 #ifdef CONFIG_COMPAT
1789 	.compat_setsockopt = compat_ipv6_setsockopt,
1790 	.compat_getsockopt = compat_ipv6_getsockopt,
1791 #endif
1792 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1793 };
1794 
1795 #ifdef CONFIG_TCP_MD5SIG
1796 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1797 	.md5_lookup	=	tcp_v6_md5_lookup,
1798 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1799 	.md5_parse	=	tcp_v6_parse_md5_keys,
1800 };
1801 #endif
1802 
1803 /*
1804  *	TCP over IPv4 via INET6 API
1805  */
1806 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1807 	.queue_xmit	   = ip_queue_xmit,
1808 	.send_check	   = tcp_v4_send_check,
1809 	.rebuild_header	   = inet_sk_rebuild_header,
1810 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1811 	.conn_request	   = tcp_v6_conn_request,
1812 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1813 	.net_header_len	   = sizeof(struct iphdr),
1814 	.setsockopt	   = ipv6_setsockopt,
1815 	.getsockopt	   = ipv6_getsockopt,
1816 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1817 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1818 #ifdef CONFIG_COMPAT
1819 	.compat_setsockopt = compat_ipv6_setsockopt,
1820 	.compat_getsockopt = compat_ipv6_getsockopt,
1821 #endif
1822 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1823 };
1824 
1825 #ifdef CONFIG_TCP_MD5SIG
1826 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1827 	.md5_lookup	=	tcp_v4_md5_lookup,
1828 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1829 	.md5_parse	=	tcp_v6_parse_md5_keys,
1830 };
1831 #endif
1832 
1833 /* NOTE: A lot of things set to zero explicitly by call to
1834  *       sk_alloc() so need not be done here.
1835  */
tcp_v6_init_sock(struct sock * sk)1836 static int tcp_v6_init_sock(struct sock *sk)
1837 {
1838 	struct inet_connection_sock *icsk = inet_csk(sk);
1839 
1840 	tcp_init_sock(sk);
1841 
1842 	icsk->icsk_af_ops = &ipv6_specific;
1843 
1844 #ifdef CONFIG_TCP_MD5SIG
1845 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1846 #endif
1847 
1848 	return 0;
1849 }
1850 
1851 #ifdef CONFIG_PROC_FS
1852 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1853 static void get_openreq6(struct seq_file *seq,
1854 			 const struct request_sock *req, int i)
1855 {
1856 	long ttd = req->rsk_timer.expires - jiffies;
1857 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1858 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1859 
1860 	if (ttd < 0)
1861 		ttd = 0;
1862 
1863 	seq_printf(seq,
1864 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1865 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1866 		   i,
1867 		   src->s6_addr32[0], src->s6_addr32[1],
1868 		   src->s6_addr32[2], src->s6_addr32[3],
1869 		   inet_rsk(req)->ir_num,
1870 		   dest->s6_addr32[0], dest->s6_addr32[1],
1871 		   dest->s6_addr32[2], dest->s6_addr32[3],
1872 		   ntohs(inet_rsk(req)->ir_rmt_port),
1873 		   TCP_SYN_RECV,
1874 		   0, 0, /* could print option size, but that is af dependent. */
1875 		   1,   /* timers active (only the expire timer) */
1876 		   jiffies_to_clock_t(ttd),
1877 		   req->num_timeout,
1878 		   from_kuid_munged(seq_user_ns(seq),
1879 				    sock_i_uid(req->rsk_listener)),
1880 		   0,  /* non standard timer */
1881 		   0, /* open_requests have no inode */
1882 		   0, req);
1883 }
1884 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1885 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1886 {
1887 	const struct in6_addr *dest, *src;
1888 	__u16 destp, srcp;
1889 	int timer_active;
1890 	unsigned long timer_expires;
1891 	const struct inet_sock *inet = inet_sk(sp);
1892 	const struct tcp_sock *tp = tcp_sk(sp);
1893 	const struct inet_connection_sock *icsk = inet_csk(sp);
1894 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1895 	int rx_queue;
1896 	int state;
1897 
1898 	dest  = &sp->sk_v6_daddr;
1899 	src   = &sp->sk_v6_rcv_saddr;
1900 	destp = ntohs(inet->inet_dport);
1901 	srcp  = ntohs(inet->inet_sport);
1902 
1903 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1904 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1905 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1906 		timer_active	= 1;
1907 		timer_expires	= icsk->icsk_timeout;
1908 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1909 		timer_active	= 4;
1910 		timer_expires	= icsk->icsk_timeout;
1911 	} else if (timer_pending(&sp->sk_timer)) {
1912 		timer_active	= 2;
1913 		timer_expires	= sp->sk_timer.expires;
1914 	} else {
1915 		timer_active	= 0;
1916 		timer_expires = jiffies;
1917 	}
1918 
1919 	state = inet_sk_state_load(sp);
1920 	if (state == TCP_LISTEN)
1921 		rx_queue = sp->sk_ack_backlog;
1922 	else
1923 		/* Because we don't lock the socket,
1924 		 * we might find a transient negative value.
1925 		 */
1926 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1927 				      READ_ONCE(tp->copied_seq), 0);
1928 
1929 	seq_printf(seq,
1930 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1931 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1932 		   i,
1933 		   src->s6_addr32[0], src->s6_addr32[1],
1934 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1935 		   dest->s6_addr32[0], dest->s6_addr32[1],
1936 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1937 		   state,
1938 		   READ_ONCE(tp->write_seq) - tp->snd_una,
1939 		   rx_queue,
1940 		   timer_active,
1941 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1942 		   icsk->icsk_retransmits,
1943 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1944 		   icsk->icsk_probes_out,
1945 		   sock_i_ino(sp),
1946 		   refcount_read(&sp->sk_refcnt), sp,
1947 		   jiffies_to_clock_t(icsk->icsk_rto),
1948 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1949 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1950 		   tp->snd_cwnd,
1951 		   state == TCP_LISTEN ?
1952 			fastopenq->max_qlen :
1953 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1954 		   );
1955 }
1956 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)1957 static void get_timewait6_sock(struct seq_file *seq,
1958 			       struct inet_timewait_sock *tw, int i)
1959 {
1960 	long delta = tw->tw_timer.expires - jiffies;
1961 	const struct in6_addr *dest, *src;
1962 	__u16 destp, srcp;
1963 
1964 	dest = &tw->tw_v6_daddr;
1965 	src  = &tw->tw_v6_rcv_saddr;
1966 	destp = ntohs(tw->tw_dport);
1967 	srcp  = ntohs(tw->tw_sport);
1968 
1969 	seq_printf(seq,
1970 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1971 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1972 		   i,
1973 		   src->s6_addr32[0], src->s6_addr32[1],
1974 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1975 		   dest->s6_addr32[0], dest->s6_addr32[1],
1976 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1977 		   tw->tw_substate, 0, 0,
1978 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1979 		   refcount_read(&tw->tw_refcnt), tw);
1980 }
1981 
tcp6_seq_show(struct seq_file * seq,void * v)1982 static int tcp6_seq_show(struct seq_file *seq, void *v)
1983 {
1984 	struct tcp_iter_state *st;
1985 	struct sock *sk = v;
1986 
1987 	if (v == SEQ_START_TOKEN) {
1988 		seq_puts(seq,
1989 			 "  sl  "
1990 			 "local_address                         "
1991 			 "remote_address                        "
1992 			 "st tx_queue rx_queue tr tm->when retrnsmt"
1993 			 "   uid  timeout inode\n");
1994 		goto out;
1995 	}
1996 	st = seq->private;
1997 
1998 	if (sk->sk_state == TCP_TIME_WAIT)
1999 		get_timewait6_sock(seq, v, st->num);
2000 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2001 		get_openreq6(seq, v, st->num);
2002 	else
2003 		get_tcp6_sock(seq, v, st->num);
2004 out:
2005 	return 0;
2006 }
2007 
2008 static const struct seq_operations tcp6_seq_ops = {
2009 	.show		= tcp6_seq_show,
2010 	.start		= tcp_seq_start,
2011 	.next		= tcp_seq_next,
2012 	.stop		= tcp_seq_stop,
2013 };
2014 
2015 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2016 	.family		= AF_INET6,
2017 };
2018 
tcp6_proc_init(struct net * net)2019 int __net_init tcp6_proc_init(struct net *net)
2020 {
2021 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2022 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2023 		return -ENOMEM;
2024 	return 0;
2025 }
2026 
tcp6_proc_exit(struct net * net)2027 void tcp6_proc_exit(struct net *net)
2028 {
2029 	remove_proc_entry("tcp6", net->proc_net);
2030 }
2031 #endif
2032 
2033 struct proto tcpv6_prot = {
2034 	.name			= "TCPv6",
2035 	.owner			= THIS_MODULE,
2036 	.close			= tcp_close,
2037 	.pre_connect		= tcp_v6_pre_connect,
2038 	.connect		= tcp_v6_connect,
2039 	.disconnect		= tcp_disconnect,
2040 	.accept			= inet_csk_accept,
2041 	.ioctl			= tcp_ioctl,
2042 	.init			= tcp_v6_init_sock,
2043 	.destroy		= tcp_v4_destroy_sock,
2044 	.shutdown		= tcp_shutdown,
2045 	.setsockopt		= tcp_setsockopt,
2046 	.getsockopt		= tcp_getsockopt,
2047 	.keepalive		= tcp_set_keepalive,
2048 	.recvmsg		= tcp_recvmsg,
2049 	.sendmsg		= tcp_sendmsg,
2050 	.sendpage		= tcp_sendpage,
2051 	.backlog_rcv		= tcp_v6_do_rcv,
2052 	.release_cb		= tcp_release_cb,
2053 	.hash			= inet6_hash,
2054 	.unhash			= inet_unhash,
2055 	.get_port		= inet_csk_get_port,
2056 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2057 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2058 	.stream_memory_free	= tcp_stream_memory_free,
2059 	.sockets_allocated	= &tcp_sockets_allocated,
2060 	.memory_allocated	= &tcp_memory_allocated,
2061 	.memory_pressure	= &tcp_memory_pressure,
2062 	.orphan_count		= &tcp_orphan_count,
2063 	.sysctl_mem		= sysctl_tcp_mem,
2064 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2065 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2066 	.max_header		= MAX_TCP_HEADER,
2067 	.obj_size		= sizeof(struct tcp6_sock),
2068 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2069 	.twsk_prot		= &tcp6_timewait_sock_ops,
2070 	.rsk_prot		= &tcp6_request_sock_ops,
2071 	.h.hashinfo		= &tcp_hashinfo,
2072 	.no_autobind		= true,
2073 #ifdef CONFIG_COMPAT
2074 	.compat_setsockopt	= compat_tcp_setsockopt,
2075 	.compat_getsockopt	= compat_tcp_getsockopt,
2076 #endif
2077 	.diag_destroy		= tcp_abort,
2078 };
2079 
2080 static const struct inet6_protocol tcpv6_protocol = {
2081 	.handler	=	tcp_v6_rcv,
2082 	.err_handler	=	tcp_v6_err,
2083 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2084 };
2085 
2086 static struct inet_protosw tcpv6_protosw = {
2087 	.type		=	SOCK_STREAM,
2088 	.protocol	=	IPPROTO_TCP,
2089 	.prot		=	&tcpv6_prot,
2090 	.ops		=	&inet6_stream_ops,
2091 	.flags		=	INET_PROTOSW_PERMANENT |
2092 				INET_PROTOSW_ICSK,
2093 };
2094 
tcpv6_net_init(struct net * net)2095 static int __net_init tcpv6_net_init(struct net *net)
2096 {
2097 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2098 				    SOCK_RAW, IPPROTO_TCP, net);
2099 }
2100 
tcpv6_net_exit(struct net * net)2101 static void __net_exit tcpv6_net_exit(struct net *net)
2102 {
2103 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2104 }
2105 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2106 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2107 {
2108 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2109 }
2110 
2111 static struct pernet_operations tcpv6_net_ops = {
2112 	.init	    = tcpv6_net_init,
2113 	.exit	    = tcpv6_net_exit,
2114 	.exit_batch = tcpv6_net_exit_batch,
2115 };
2116 
tcpv6_init(void)2117 int __init tcpv6_init(void)
2118 {
2119 	int ret;
2120 
2121 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2122 	if (ret)
2123 		goto out;
2124 
2125 	/* register inet6 protocol */
2126 	ret = inet6_register_protosw(&tcpv6_protosw);
2127 	if (ret)
2128 		goto out_tcpv6_protocol;
2129 
2130 	ret = register_pernet_subsys(&tcpv6_net_ops);
2131 	if (ret)
2132 		goto out_tcpv6_protosw;
2133 out:
2134 	return ret;
2135 
2136 out_tcpv6_protosw:
2137 	inet6_unregister_protosw(&tcpv6_protosw);
2138 out_tcpv6_protocol:
2139 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2140 	goto out;
2141 }
2142 
tcpv6_exit(void)2143 void tcpv6_exit(void)
2144 {
2145 	unregister_pernet_subsys(&tcpv6_net_ops);
2146 	inet6_unregister_protosw(&tcpv6_protosw);
2147 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2148 }
2149