• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	TCP over IPv6
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on:
9  *	linux/net/ipv4/tcp.c
10  *	linux/net/ipv4/tcp_input.c
11  *	linux/net/ipv4/tcp_output.c
12  *
13  *	Fixes:
14  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
15  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
16  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
17  *					a single port at the same time.
18  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  */
25 
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
34 #include <linux/in.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
46 
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
65 
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
68 
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
71 
72 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 				      struct request_sock *req);
75 
76 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 						   const struct in6_addr *addr)
86 {
87 	return NULL;
88 }
89 #endif
90 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
92 {
93 	struct dst_entry *dst = skb_dst(skb);
94 
95 	if (dst && dst_hold_safe(dst)) {
96 		const struct rt6_info *rt = (const struct rt6_info *)dst;
97 
98 		sk->sk_rx_dst = dst;
99 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
101 	}
102 }
103 
tcp_v6_init_seq(const struct sk_buff * skb)104 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
105 {
106 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
107 				ipv6_hdr(skb)->saddr.s6_addr32,
108 				tcp_hdr(skb)->dest,
109 				tcp_hdr(skb)->source);
110 }
111 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)112 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
113 {
114 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
115 				   ipv6_hdr(skb)->saddr.s6_addr32);
116 }
117 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)118 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
119 			  int addr_len)
120 {
121 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
122 	struct inet_sock *inet = inet_sk(sk);
123 	struct inet_connection_sock *icsk = inet_csk(sk);
124 	struct ipv6_pinfo *np = inet6_sk(sk);
125 	struct tcp_sock *tp = tcp_sk(sk);
126 	struct in6_addr *saddr = NULL, *final_p, final;
127 	struct ipv6_txoptions *opt;
128 	struct flowi6 fl6;
129 	struct dst_entry *dst;
130 	int addr_type;
131 	int err;
132 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
133 
134 	if (addr_len < SIN6_LEN_RFC2133)
135 		return -EINVAL;
136 
137 	if (usin->sin6_family != AF_INET6)
138 		return -EAFNOSUPPORT;
139 
140 	memset(&fl6, 0, sizeof(fl6));
141 
142 	if (np->sndflow) {
143 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 		IP6_ECN_flow_init(fl6.flowlabel);
145 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
146 			struct ip6_flowlabel *flowlabel;
147 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
148 			if (!flowlabel)
149 				return -EINVAL;
150 			fl6_sock_release(flowlabel);
151 		}
152 	}
153 
154 	/*
155 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
156 	 */
157 
158 	if (ipv6_addr_any(&usin->sin6_addr)) {
159 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
160 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
161 					       &usin->sin6_addr);
162 		else
163 			usin->sin6_addr = in6addr_loopback;
164 	}
165 
166 	addr_type = ipv6_addr_type(&usin->sin6_addr);
167 
168 	if (addr_type & IPV6_ADDR_MULTICAST)
169 		return -ENETUNREACH;
170 
171 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
172 		if (addr_len >= sizeof(struct sockaddr_in6) &&
173 		    usin->sin6_scope_id) {
174 			/* If interface is set while binding, indices
175 			 * must coincide.
176 			 */
177 			if (sk->sk_bound_dev_if &&
178 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
179 				return -EINVAL;
180 
181 			sk->sk_bound_dev_if = usin->sin6_scope_id;
182 		}
183 
184 		/* Connect to link-local address requires an interface */
185 		if (!sk->sk_bound_dev_if)
186 			return -EINVAL;
187 	}
188 
189 	if (tp->rx_opt.ts_recent_stamp &&
190 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
191 		tp->rx_opt.ts_recent = 0;
192 		tp->rx_opt.ts_recent_stamp = 0;
193 		tp->write_seq = 0;
194 	}
195 
196 	sk->sk_v6_daddr = usin->sin6_addr;
197 	np->flow_label = fl6.flowlabel;
198 
199 	/*
200 	 *	TCP over IPv4
201 	 */
202 
203 	if (addr_type & IPV6_ADDR_MAPPED) {
204 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
205 		struct sockaddr_in sin;
206 
207 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
208 
209 		if (__ipv6_only_sock(sk))
210 			return -ENETUNREACH;
211 
212 		sin.sin_family = AF_INET;
213 		sin.sin_port = usin->sin6_port;
214 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
215 
216 		icsk->icsk_af_ops = &ipv6_mapped;
217 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
218 #ifdef CONFIG_TCP_MD5SIG
219 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
220 #endif
221 
222 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
223 
224 		if (err) {
225 			icsk->icsk_ext_hdr_len = exthdrlen;
226 			icsk->icsk_af_ops = &ipv6_specific;
227 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
228 #ifdef CONFIG_TCP_MD5SIG
229 			tp->af_specific = &tcp_sock_ipv6_specific;
230 #endif
231 			goto failure;
232 		}
233 		np->saddr = sk->sk_v6_rcv_saddr;
234 
235 		return err;
236 	}
237 
238 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
239 		saddr = &sk->sk_v6_rcv_saddr;
240 
241 	fl6.flowi6_proto = IPPROTO_TCP;
242 	fl6.daddr = sk->sk_v6_daddr;
243 	fl6.saddr = saddr ? *saddr : np->saddr;
244 	fl6.flowi6_oif = sk->sk_bound_dev_if;
245 	fl6.flowi6_mark = sk->sk_mark;
246 	fl6.fl6_dport = usin->sin6_port;
247 	fl6.fl6_sport = inet->inet_sport;
248 	fl6.flowi6_uid = sk->sk_uid;
249 
250 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
251 	final_p = fl6_update_dst(&fl6, opt, &final);
252 
253 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
254 
255 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
256 	if (IS_ERR(dst)) {
257 		err = PTR_ERR(dst);
258 		goto failure;
259 	}
260 
261 	if (!saddr) {
262 		saddr = &fl6.saddr;
263 		sk->sk_v6_rcv_saddr = *saddr;
264 	}
265 
266 	/* set the source address */
267 	np->saddr = *saddr;
268 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
269 
270 	sk->sk_gso_type = SKB_GSO_TCPV6;
271 	ip6_dst_store(sk, dst, NULL, NULL);
272 
273 	icsk->icsk_ext_hdr_len = 0;
274 	if (opt)
275 		icsk->icsk_ext_hdr_len = opt->opt_flen +
276 					 opt->opt_nflen;
277 
278 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
279 
280 	inet->inet_dport = usin->sin6_port;
281 
282 	tcp_set_state(sk, TCP_SYN_SENT);
283 	err = inet6_hash_connect(tcp_death_row, sk);
284 	if (err)
285 		goto late_failure;
286 
287 	sk_set_txhash(sk);
288 
289 	if (likely(!tp->repair)) {
290 		if (!tp->write_seq)
291 			tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
292 							 sk->sk_v6_daddr.s6_addr32,
293 							 inet->inet_sport,
294 							 inet->inet_dport);
295 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
296 						   np->saddr.s6_addr32,
297 						   sk->sk_v6_daddr.s6_addr32);
298 	}
299 
300 	if (tcp_fastopen_defer_connect(sk, &err))
301 		return err;
302 	if (err)
303 		goto late_failure;
304 
305 	err = tcp_connect(sk);
306 	if (err)
307 		goto late_failure;
308 
309 	return 0;
310 
311 late_failure:
312 	tcp_set_state(sk, TCP_CLOSE);
313 failure:
314 	inet->inet_dport = 0;
315 	sk->sk_route_caps = 0;
316 	return err;
317 }
318 
tcp_v6_mtu_reduced(struct sock * sk)319 static void tcp_v6_mtu_reduced(struct sock *sk)
320 {
321 	struct dst_entry *dst;
322 
323 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
324 		return;
325 
326 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
327 	if (!dst)
328 		return;
329 
330 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
331 		tcp_sync_mss(sk, dst_mtu(dst));
332 		tcp_simple_retransmit(sk);
333 	}
334 }
335 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)336 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
337 		u8 type, u8 code, int offset, __be32 info)
338 {
339 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
340 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
341 	struct net *net = dev_net(skb->dev);
342 	struct request_sock *fastopen;
343 	struct ipv6_pinfo *np;
344 	struct tcp_sock *tp;
345 	__u32 seq, snd_una;
346 	struct sock *sk;
347 	bool fatal;
348 	int err;
349 
350 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
351 					&hdr->daddr, th->dest,
352 					&hdr->saddr, ntohs(th->source),
353 					skb->dev->ifindex, inet6_sdif(skb));
354 
355 	if (!sk) {
356 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
357 				  ICMP6_MIB_INERRORS);
358 		return;
359 	}
360 
361 	if (sk->sk_state == TCP_TIME_WAIT) {
362 		inet_twsk_put(inet_twsk(sk));
363 		return;
364 	}
365 	seq = ntohl(th->seq);
366 	fatal = icmpv6_err_convert(type, code, &err);
367 	if (sk->sk_state == TCP_NEW_SYN_RECV)
368 		return tcp_req_err(sk, seq, fatal);
369 
370 	bh_lock_sock(sk);
371 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
372 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
373 
374 	if (sk->sk_state == TCP_CLOSE)
375 		goto out;
376 
377 	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
378 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
379 		goto out;
380 	}
381 
382 	tp = tcp_sk(sk);
383 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
384 	fastopen = tp->fastopen_rsk;
385 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
386 	if (sk->sk_state != TCP_LISTEN &&
387 	    !between(seq, snd_una, tp->snd_nxt)) {
388 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
389 		goto out;
390 	}
391 
392 	np = inet6_sk(sk);
393 
394 	if (type == NDISC_REDIRECT) {
395 		if (!sock_owned_by_user(sk)) {
396 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
397 
398 			if (dst)
399 				dst->ops->redirect(dst, sk, skb);
400 		}
401 		goto out;
402 	}
403 
404 	if (type == ICMPV6_PKT_TOOBIG) {
405 		/* We are not interested in TCP_LISTEN and open_requests
406 		 * (SYN-ACKs send out by Linux are always <576bytes so
407 		 * they should go through unfragmented).
408 		 */
409 		if (sk->sk_state == TCP_LISTEN)
410 			goto out;
411 
412 		if (!ip6_sk_accept_pmtu(sk))
413 			goto out;
414 
415 		tp->mtu_info = ntohl(info);
416 		if (!sock_owned_by_user(sk))
417 			tcp_v6_mtu_reduced(sk);
418 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
419 					   &sk->sk_tsq_flags))
420 			sock_hold(sk);
421 		goto out;
422 	}
423 
424 
425 	/* Might be for an request_sock */
426 	switch (sk->sk_state) {
427 	case TCP_SYN_SENT:
428 	case TCP_SYN_RECV:
429 		/* Only in fast or simultaneous open. If a fast open socket is
430 		 * is already accepted it is treated as a connected one below.
431 		 */
432 		if (fastopen && !fastopen->sk)
433 			break;
434 
435 		if (!sock_owned_by_user(sk)) {
436 			sk->sk_err = err;
437 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
438 
439 			tcp_done(sk);
440 		} else
441 			sk->sk_err_soft = err;
442 		goto out;
443 	}
444 
445 	if (!sock_owned_by_user(sk) && np->recverr) {
446 		sk->sk_err = err;
447 		sk->sk_error_report(sk);
448 	} else
449 		sk->sk_err_soft = err;
450 
451 out:
452 	bh_unlock_sock(sk);
453 	sock_put(sk);
454 }
455 
456 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type)457 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
458 			      struct flowi *fl,
459 			      struct request_sock *req,
460 			      struct tcp_fastopen_cookie *foc,
461 			      enum tcp_synack_type synack_type)
462 {
463 	struct inet_request_sock *ireq = inet_rsk(req);
464 	struct ipv6_pinfo *np = inet6_sk(sk);
465 	struct ipv6_txoptions *opt;
466 	struct flowi6 *fl6 = &fl->u.ip6;
467 	struct sk_buff *skb;
468 	int err = -ENOMEM;
469 
470 	/* First, grab a route. */
471 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
472 					       IPPROTO_TCP)) == NULL)
473 		goto done;
474 
475 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
476 
477 	if (skb) {
478 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
479 				    &ireq->ir_v6_rmt_addr);
480 
481 		fl6->daddr = ireq->ir_v6_rmt_addr;
482 		if (np->repflow && ireq->pktopts)
483 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
484 
485 		rcu_read_lock();
486 		opt = ireq->ipv6_opt;
487 		if (!opt)
488 			opt = rcu_dereference(np->opt);
489 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
490 		rcu_read_unlock();
491 		err = net_xmit_eval(err);
492 	}
493 
494 done:
495 	return err;
496 }
497 
498 
tcp_v6_reqsk_destructor(struct request_sock * req)499 static void tcp_v6_reqsk_destructor(struct request_sock *req)
500 {
501 	kfree(inet_rsk(req)->ipv6_opt);
502 	kfree_skb(inet_rsk(req)->pktopts);
503 }
504 
505 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)506 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
507 						   const struct in6_addr *addr)
508 {
509 	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
510 }
511 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)512 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
513 						const struct sock *addr_sk)
514 {
515 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
516 }
517 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,char __user * optval,int optlen)518 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
519 				 char __user *optval, int optlen)
520 {
521 	struct tcp_md5sig cmd;
522 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
523 	u8 prefixlen;
524 
525 	if (optlen < sizeof(cmd))
526 		return -EINVAL;
527 
528 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
529 		return -EFAULT;
530 
531 	if (sin6->sin6_family != AF_INET6)
532 		return -EINVAL;
533 
534 	if (optname == TCP_MD5SIG_EXT &&
535 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
536 		prefixlen = cmd.tcpm_prefixlen;
537 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
538 					prefixlen > 32))
539 			return -EINVAL;
540 	} else {
541 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
542 	}
543 
544 	if (!cmd.tcpm_keylen) {
545 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
546 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
547 					      AF_INET, prefixlen);
548 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
549 				      AF_INET6, prefixlen);
550 	}
551 
552 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
553 		return -EINVAL;
554 
555 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
556 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
557 				      AF_INET, prefixlen, cmd.tcpm_key,
558 				      cmd.tcpm_keylen, GFP_KERNEL);
559 
560 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
561 			      AF_INET6, prefixlen, cmd.tcpm_key,
562 			      cmd.tcpm_keylen, GFP_KERNEL);
563 }
564 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)565 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
566 				   const struct in6_addr *daddr,
567 				   const struct in6_addr *saddr,
568 				   const struct tcphdr *th, int nbytes)
569 {
570 	struct tcp6_pseudohdr *bp;
571 	struct scatterlist sg;
572 	struct tcphdr *_th;
573 
574 	bp = hp->scratch;
575 	/* 1. TCP pseudo-header (RFC2460) */
576 	bp->saddr = *saddr;
577 	bp->daddr = *daddr;
578 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
579 	bp->len = cpu_to_be32(nbytes);
580 
581 	_th = (struct tcphdr *)(bp + 1);
582 	memcpy(_th, th, sizeof(*th));
583 	_th->check = 0;
584 
585 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
586 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
587 				sizeof(*bp) + sizeof(*th));
588 	return crypto_ahash_update(hp->md5_req);
589 }
590 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)591 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
592 			       const struct in6_addr *daddr, struct in6_addr *saddr,
593 			       const struct tcphdr *th)
594 {
595 	struct tcp_md5sig_pool *hp;
596 	struct ahash_request *req;
597 
598 	hp = tcp_get_md5sig_pool();
599 	if (!hp)
600 		goto clear_hash_noput;
601 	req = hp->md5_req;
602 
603 	if (crypto_ahash_init(req))
604 		goto clear_hash;
605 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
606 		goto clear_hash;
607 	if (tcp_md5_hash_key(hp, key))
608 		goto clear_hash;
609 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
610 	if (crypto_ahash_final(req))
611 		goto clear_hash;
612 
613 	tcp_put_md5sig_pool();
614 	return 0;
615 
616 clear_hash:
617 	tcp_put_md5sig_pool();
618 clear_hash_noput:
619 	memset(md5_hash, 0, 16);
620 	return 1;
621 }
622 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)623 static int tcp_v6_md5_hash_skb(char *md5_hash,
624 			       const struct tcp_md5sig_key *key,
625 			       const struct sock *sk,
626 			       const struct sk_buff *skb)
627 {
628 	const struct in6_addr *saddr, *daddr;
629 	struct tcp_md5sig_pool *hp;
630 	struct ahash_request *req;
631 	const struct tcphdr *th = tcp_hdr(skb);
632 
633 	if (sk) { /* valid for establish/request sockets */
634 		saddr = &sk->sk_v6_rcv_saddr;
635 		daddr = &sk->sk_v6_daddr;
636 	} else {
637 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
638 		saddr = &ip6h->saddr;
639 		daddr = &ip6h->daddr;
640 	}
641 
642 	hp = tcp_get_md5sig_pool();
643 	if (!hp)
644 		goto clear_hash_noput;
645 	req = hp->md5_req;
646 
647 	if (crypto_ahash_init(req))
648 		goto clear_hash;
649 
650 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
651 		goto clear_hash;
652 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
653 		goto clear_hash;
654 	if (tcp_md5_hash_key(hp, key))
655 		goto clear_hash;
656 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
657 	if (crypto_ahash_final(req))
658 		goto clear_hash;
659 
660 	tcp_put_md5sig_pool();
661 	return 0;
662 
663 clear_hash:
664 	tcp_put_md5sig_pool();
665 clear_hash_noput:
666 	memset(md5_hash, 0, 16);
667 	return 1;
668 }
669 
670 #endif
671 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb)672 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
673 				    const struct sk_buff *skb)
674 {
675 #ifdef CONFIG_TCP_MD5SIG
676 	const __u8 *hash_location = NULL;
677 	struct tcp_md5sig_key *hash_expected;
678 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
679 	const struct tcphdr *th = tcp_hdr(skb);
680 	int genhash;
681 	u8 newhash[16];
682 
683 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
684 	hash_location = tcp_parse_md5sig_option(th);
685 
686 	/* We've parsed the options - do we have a hash? */
687 	if (!hash_expected && !hash_location)
688 		return false;
689 
690 	if (hash_expected && !hash_location) {
691 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
692 		return true;
693 	}
694 
695 	if (!hash_expected && hash_location) {
696 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
697 		return true;
698 	}
699 
700 	/* check the signature */
701 	genhash = tcp_v6_md5_hash_skb(newhash,
702 				      hash_expected,
703 				      NULL, skb);
704 
705 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
706 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
707 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
708 				     genhash ? "failed" : "mismatch",
709 				     &ip6h->saddr, ntohs(th->source),
710 				     &ip6h->daddr, ntohs(th->dest));
711 		return true;
712 	}
713 #endif
714 	return false;
715 }
716 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)717 static void tcp_v6_init_req(struct request_sock *req,
718 			    const struct sock *sk_listener,
719 			    struct sk_buff *skb)
720 {
721 	struct inet_request_sock *ireq = inet_rsk(req);
722 	const struct ipv6_pinfo *np = inet6_sk(sk_listener);
723 
724 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
725 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
726 
727 	/* So that link locals have meaning */
728 	if (!sk_listener->sk_bound_dev_if &&
729 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
730 		ireq->ir_iif = tcp_v6_iif(skb);
731 
732 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
733 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
734 	     np->rxopt.bits.rxinfo ||
735 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
736 	     np->rxopt.bits.rxohlim || np->repflow)) {
737 		refcount_inc(&skb->users);
738 		ireq->pktopts = skb;
739 	}
740 }
741 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)742 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
743 					  struct flowi *fl,
744 					  const struct request_sock *req)
745 {
746 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
747 }
748 
749 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
750 	.family		=	AF_INET6,
751 	.obj_size	=	sizeof(struct tcp6_request_sock),
752 	.rtx_syn_ack	=	tcp_rtx_synack,
753 	.send_ack	=	tcp_v6_reqsk_send_ack,
754 	.destructor	=	tcp_v6_reqsk_destructor,
755 	.send_reset	=	tcp_v6_send_reset,
756 	.syn_ack_timeout =	tcp_syn_ack_timeout,
757 };
758 
759 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
760 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
761 				sizeof(struct ipv6hdr),
762 #ifdef CONFIG_TCP_MD5SIG
763 	.req_md5_lookup	=	tcp_v6_md5_lookup,
764 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
765 #endif
766 	.init_req	=	tcp_v6_init_req,
767 #ifdef CONFIG_SYN_COOKIES
768 	.cookie_init_seq =	cookie_v6_init_sequence,
769 #endif
770 	.route_req	=	tcp_v6_route_req,
771 	.init_seq	=	tcp_v6_init_seq,
772 	.init_ts_off	=	tcp_v6_init_ts_off,
773 	.send_synack	=	tcp_v6_send_synack,
774 };
775 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label)776 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
777 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
778 				 int oif, struct tcp_md5sig_key *key, int rst,
779 				 u8 tclass, __be32 label)
780 {
781 	const struct tcphdr *th = tcp_hdr(skb);
782 	struct tcphdr *t1;
783 	struct sk_buff *buff;
784 	struct flowi6 fl6;
785 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
786 	struct sock *ctl_sk = net->ipv6.tcp_sk;
787 	unsigned int tot_len = sizeof(struct tcphdr);
788 	struct dst_entry *dst;
789 	__be32 *topt;
790 
791 	if (tsecr)
792 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
793 #ifdef CONFIG_TCP_MD5SIG
794 	if (key)
795 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
796 #endif
797 
798 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
799 			 GFP_ATOMIC);
800 	if (!buff)
801 		return;
802 
803 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
804 
805 	t1 = skb_push(buff, tot_len);
806 	skb_reset_transport_header(buff);
807 
808 	/* Swap the send and the receive. */
809 	memset(t1, 0, sizeof(*t1));
810 	t1->dest = th->source;
811 	t1->source = th->dest;
812 	t1->doff = tot_len / 4;
813 	t1->seq = htonl(seq);
814 	t1->ack_seq = htonl(ack);
815 	t1->ack = !rst || !th->ack;
816 	t1->rst = rst;
817 	t1->window = htons(win);
818 
819 	topt = (__be32 *)(t1 + 1);
820 
821 	if (tsecr) {
822 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
823 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
824 		*topt++ = htonl(tsval);
825 		*topt++ = htonl(tsecr);
826 	}
827 
828 #ifdef CONFIG_TCP_MD5SIG
829 	if (key) {
830 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
831 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
832 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
833 				    &ipv6_hdr(skb)->saddr,
834 				    &ipv6_hdr(skb)->daddr, t1);
835 	}
836 #endif
837 
838 	memset(&fl6, 0, sizeof(fl6));
839 	fl6.daddr = ipv6_hdr(skb)->saddr;
840 	fl6.saddr = ipv6_hdr(skb)->daddr;
841 	fl6.flowlabel = label;
842 
843 	buff->ip_summed = CHECKSUM_PARTIAL;
844 	buff->csum = 0;
845 
846 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
847 
848 	fl6.flowi6_proto = IPPROTO_TCP;
849 	if (rt6_need_strict(&fl6.daddr) && !oif)
850 		fl6.flowi6_oif = tcp_v6_iif(skb);
851 	else {
852 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
853 			oif = skb->skb_iif;
854 
855 		fl6.flowi6_oif = oif;
856 	}
857 
858 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
859 	fl6.fl6_dport = t1->dest;
860 	fl6.fl6_sport = t1->source;
861 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
862 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
863 
864 	/* Pass a socket to ip6_dst_lookup either it is for RST
865 	 * Underlying function will use this to retrieve the network
866 	 * namespace
867 	 */
868 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
869 	if (!IS_ERR(dst)) {
870 		skb_dst_set(buff, dst);
871 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
872 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
873 		if (rst)
874 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
875 		return;
876 	}
877 
878 	kfree_skb(buff);
879 }
880 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)881 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
882 {
883 	const struct tcphdr *th = tcp_hdr(skb);
884 	u32 seq = 0, ack_seq = 0;
885 	struct tcp_md5sig_key *key = NULL;
886 #ifdef CONFIG_TCP_MD5SIG
887 	const __u8 *hash_location = NULL;
888 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
889 	unsigned char newhash[16];
890 	int genhash;
891 	struct sock *sk1 = NULL;
892 #endif
893 	int oif;
894 
895 	if (th->rst)
896 		return;
897 
898 	/* If sk not NULL, it means we did a successful lookup and incoming
899 	 * route had to be correct. prequeue might have dropped our dst.
900 	 */
901 	if (!sk && !ipv6_unicast_destination(skb))
902 		return;
903 
904 #ifdef CONFIG_TCP_MD5SIG
905 	rcu_read_lock();
906 	hash_location = tcp_parse_md5sig_option(th);
907 	if (sk && sk_fullsock(sk)) {
908 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
909 	} else if (hash_location) {
910 		/*
911 		 * active side is lost. Try to find listening socket through
912 		 * source port, and then find md5 key through listening socket.
913 		 * we are not loose security here:
914 		 * Incoming packet is checked with md5 hash with finding key,
915 		 * no RST generated if md5 hash doesn't match.
916 		 */
917 		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
918 					   &tcp_hashinfo, NULL, 0,
919 					   &ipv6h->saddr,
920 					   th->source, &ipv6h->daddr,
921 					   ntohs(th->source),
922 					   tcp_v6_iif_l3_slave(skb),
923 					   tcp_v6_sdif(skb));
924 		if (!sk1)
925 			goto out;
926 
927 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
928 		if (!key)
929 			goto out;
930 
931 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
932 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
933 			goto out;
934 	}
935 #endif
936 
937 	if (th->ack)
938 		seq = ntohl(th->ack_seq);
939 	else
940 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
941 			  (th->doff << 2);
942 
943 	oif = sk ? sk->sk_bound_dev_if : 0;
944 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
945 
946 #ifdef CONFIG_TCP_MD5SIG
947 out:
948 	rcu_read_unlock();
949 #endif
950 }
951 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label)952 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
953 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
954 			    struct tcp_md5sig_key *key, u8 tclass,
955 			    __be32 label)
956 {
957 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
958 			     tclass, label);
959 }
960 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)961 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
962 {
963 	struct inet_timewait_sock *tw = inet_twsk(sk);
964 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
965 
966 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
967 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
968 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
969 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
970 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
971 
972 	inet_twsk_put(tw);
973 }
974 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)975 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
976 				  struct request_sock *req)
977 {
978 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
979 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
980 	 */
981 	/* RFC 7323 2.3
982 	 * The window field (SEG.WND) of every outgoing segment, with the
983 	 * exception of <SYN> segments, MUST be right-shifted by
984 	 * Rcv.Wind.Shift bits:
985 	 */
986 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
987 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
988 			tcp_rsk(req)->rcv_nxt,
989 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
990 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
991 			req->ts_recent, sk->sk_bound_dev_if,
992 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
993 			0, 0);
994 }
995 
996 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)997 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
998 {
999 #ifdef CONFIG_SYN_COOKIES
1000 	const struct tcphdr *th = tcp_hdr(skb);
1001 
1002 	if (!th->syn)
1003 		sk = cookie_v6_check(sk, skb);
1004 #endif
1005 	return sk;
1006 }
1007 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1008 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1009 {
1010 	if (skb->protocol == htons(ETH_P_IP))
1011 		return tcp_v4_conn_request(sk, skb);
1012 
1013 	if (!ipv6_unicast_destination(skb))
1014 		goto drop;
1015 
1016 	return tcp_conn_request(&tcp6_request_sock_ops,
1017 				&tcp_request_sock_ipv6_ops, sk, skb);
1018 
1019 drop:
1020 	tcp_listendrop(sk);
1021 	return 0; /* don't send reset */
1022 }
1023 
tcp_v6_restore_cb(struct sk_buff * skb)1024 static void tcp_v6_restore_cb(struct sk_buff *skb)
1025 {
1026 	/* We need to move header back to the beginning if xfrm6_policy_check()
1027 	 * and tcp_v6_fill_cb() are going to be called again.
1028 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1029 	 */
1030 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1031 		sizeof(struct inet6_skb_parm));
1032 }
1033 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1034 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1035 					 struct request_sock *req,
1036 					 struct dst_entry *dst,
1037 					 struct request_sock *req_unhash,
1038 					 bool *own_req)
1039 {
1040 	struct inet_request_sock *ireq;
1041 	struct ipv6_pinfo *newnp;
1042 	const struct ipv6_pinfo *np = inet6_sk(sk);
1043 	struct ipv6_txoptions *opt;
1044 	struct tcp6_sock *newtcp6sk;
1045 	struct inet_sock *newinet;
1046 	struct tcp_sock *newtp;
1047 	struct sock *newsk;
1048 #ifdef CONFIG_TCP_MD5SIG
1049 	struct tcp_md5sig_key *key;
1050 #endif
1051 	struct flowi6 fl6;
1052 
1053 	if (skb->protocol == htons(ETH_P_IP)) {
1054 		/*
1055 		 *	v6 mapped
1056 		 */
1057 
1058 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1059 					     req_unhash, own_req);
1060 
1061 		if (!newsk)
1062 			return NULL;
1063 
1064 		newtcp6sk = (struct tcp6_sock *)newsk;
1065 		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1066 
1067 		newinet = inet_sk(newsk);
1068 		newnp = inet6_sk(newsk);
1069 		newtp = tcp_sk(newsk);
1070 
1071 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1072 
1073 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1074 
1075 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1076 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1077 #ifdef CONFIG_TCP_MD5SIG
1078 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1079 #endif
1080 
1081 		newnp->ipv6_mc_list = NULL;
1082 		newnp->ipv6_ac_list = NULL;
1083 		newnp->ipv6_fl_list = NULL;
1084 		newnp->pktoptions  = NULL;
1085 		newnp->opt	   = NULL;
1086 		newnp->mcast_oif   = inet_iif(skb);
1087 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1088 		newnp->rcv_flowinfo = 0;
1089 		if (np->repflow)
1090 			newnp->flow_label = 0;
1091 
1092 		/*
1093 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1094 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1095 		 * that function for the gory details. -acme
1096 		 */
1097 
1098 		/* It is tricky place. Until this moment IPv4 tcp
1099 		   worked with IPv6 icsk.icsk_af_ops.
1100 		   Sync it now.
1101 		 */
1102 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1103 
1104 		return newsk;
1105 	}
1106 
1107 	ireq = inet_rsk(req);
1108 
1109 	if (sk_acceptq_is_full(sk))
1110 		goto out_overflow;
1111 
1112 	if (!dst) {
1113 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1114 		if (!dst)
1115 			goto out;
1116 	}
1117 
1118 	newsk = tcp_create_openreq_child(sk, req, skb);
1119 	if (!newsk)
1120 		goto out_nonewsk;
1121 
1122 	/*
1123 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1124 	 * count here, tcp_create_openreq_child now does this for us, see the
1125 	 * comment in that function for the gory details. -acme
1126 	 */
1127 
1128 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1129 	ip6_dst_store(newsk, dst, NULL, NULL);
1130 	inet6_sk_rx_dst_set(newsk, skb);
1131 
1132 	newtcp6sk = (struct tcp6_sock *)newsk;
1133 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1134 
1135 	newtp = tcp_sk(newsk);
1136 	newinet = inet_sk(newsk);
1137 	newnp = inet6_sk(newsk);
1138 
1139 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1140 
1141 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1142 	newnp->saddr = ireq->ir_v6_loc_addr;
1143 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1144 	newsk->sk_bound_dev_if = ireq->ir_iif;
1145 
1146 	/* Now IPv6 options...
1147 
1148 	   First: no IPv4 options.
1149 	 */
1150 	newinet->inet_opt = NULL;
1151 	newnp->ipv6_mc_list = NULL;
1152 	newnp->ipv6_ac_list = NULL;
1153 	newnp->ipv6_fl_list = NULL;
1154 
1155 	/* Clone RX bits */
1156 	newnp->rxopt.all = np->rxopt.all;
1157 
1158 	newnp->pktoptions = NULL;
1159 	newnp->opt	  = NULL;
1160 	newnp->mcast_oif  = tcp_v6_iif(skb);
1161 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1162 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1163 	if (np->repflow)
1164 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1165 
1166 	/* Clone native IPv6 options from listening socket (if any)
1167 
1168 	   Yes, keeping reference count would be much more clever,
1169 	   but we make one more one thing there: reattach optmem
1170 	   to newsk.
1171 	 */
1172 	opt = ireq->ipv6_opt;
1173 	if (!opt)
1174 		opt = rcu_dereference(np->opt);
1175 	if (opt) {
1176 		opt = ipv6_dup_options(newsk, opt);
1177 		RCU_INIT_POINTER(newnp->opt, opt);
1178 	}
1179 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1180 	if (opt)
1181 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1182 						    opt->opt_flen;
1183 
1184 	tcp_ca_openreq_child(newsk, dst);
1185 
1186 	tcp_sync_mss(newsk, dst_mtu(dst));
1187 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1188 
1189 	tcp_initialize_rcv_mss(newsk);
1190 
1191 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1192 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1193 
1194 #ifdef CONFIG_TCP_MD5SIG
1195 	/* Copy over the MD5 key from the original socket */
1196 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1197 	if (key) {
1198 		/* We're using one, so create a matching key
1199 		 * on the newsk structure. If we fail to get
1200 		 * memory, then we end up not copying the key
1201 		 * across. Shucks.
1202 		 */
1203 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1204 			       AF_INET6, 128, key->key, key->keylen,
1205 			       sk_gfp_mask(sk, GFP_ATOMIC));
1206 	}
1207 #endif
1208 
1209 	if (__inet_inherit_port(sk, newsk) < 0) {
1210 		inet_csk_prepare_forced_close(newsk);
1211 		tcp_done(newsk);
1212 		goto out;
1213 	}
1214 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1215 	if (*own_req) {
1216 		tcp_move_syn(newtp, req);
1217 
1218 		/* Clone pktoptions received with SYN, if we own the req */
1219 		if (ireq->pktopts) {
1220 			newnp->pktoptions = skb_clone(ireq->pktopts,
1221 						      sk_gfp_mask(sk, GFP_ATOMIC));
1222 			consume_skb(ireq->pktopts);
1223 			ireq->pktopts = NULL;
1224 			if (newnp->pktoptions) {
1225 				tcp_v6_restore_cb(newnp->pktoptions);
1226 				skb_set_owner_r(newnp->pktoptions, newsk);
1227 			}
1228 		}
1229 	}
1230 
1231 	return newsk;
1232 
1233 out_overflow:
1234 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1235 out_nonewsk:
1236 	dst_release(dst);
1237 out:
1238 	tcp_listendrop(sk);
1239 	return NULL;
1240 }
1241 
1242 /* The socket must have it's spinlock held when we get
1243  * here, unless it is a TCP_LISTEN socket.
1244  *
1245  * We have a potential double-lock case here, so even when
1246  * doing backlog processing we use the BH locking scheme.
1247  * This is because we cannot sleep with the original spinlock
1248  * held.
1249  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1250 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1251 {
1252 	struct ipv6_pinfo *np = inet6_sk(sk);
1253 	struct tcp_sock *tp;
1254 	struct sk_buff *opt_skb = NULL;
1255 
1256 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1257 	   goes to IPv4 receive handler and backlogged.
1258 	   From backlog it always goes here. Kerboom...
1259 	   Fortunately, tcp_rcv_established and rcv_established
1260 	   handle them correctly, but it is not case with
1261 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1262 	 */
1263 
1264 	if (skb->protocol == htons(ETH_P_IP))
1265 		return tcp_v4_do_rcv(sk, skb);
1266 
1267 	/*
1268 	 *	socket locking is here for SMP purposes as backlog rcv
1269 	 *	is currently called with bh processing disabled.
1270 	 */
1271 
1272 	/* Do Stevens' IPV6_PKTOPTIONS.
1273 
1274 	   Yes, guys, it is the only place in our code, where we
1275 	   may make it not affecting IPv4.
1276 	   The rest of code is protocol independent,
1277 	   and I do not like idea to uglify IPv4.
1278 
1279 	   Actually, all the idea behind IPV6_PKTOPTIONS
1280 	   looks not very well thought. For now we latch
1281 	   options, received in the last packet, enqueued
1282 	   by tcp. Feel free to propose better solution.
1283 					       --ANK (980728)
1284 	 */
1285 	if (np->rxopt.all)
1286 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1287 
1288 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1289 		struct dst_entry *dst = sk->sk_rx_dst;
1290 
1291 		sock_rps_save_rxhash(sk, skb);
1292 		sk_mark_napi_id(sk, skb);
1293 		if (dst) {
1294 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1295 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1296 				dst_release(dst);
1297 				sk->sk_rx_dst = NULL;
1298 			}
1299 		}
1300 
1301 		tcp_rcv_established(sk, skb, tcp_hdr(skb));
1302 		if (opt_skb)
1303 			goto ipv6_pktoptions;
1304 		return 0;
1305 	}
1306 
1307 	if (tcp_checksum_complete(skb))
1308 		goto csum_err;
1309 
1310 	if (sk->sk_state == TCP_LISTEN) {
1311 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1312 
1313 		if (!nsk)
1314 			goto discard;
1315 
1316 		if (nsk != sk) {
1317 			if (tcp_child_process(sk, nsk, skb))
1318 				goto reset;
1319 			if (opt_skb)
1320 				__kfree_skb(opt_skb);
1321 			return 0;
1322 		}
1323 	} else
1324 		sock_rps_save_rxhash(sk, skb);
1325 
1326 	if (tcp_rcv_state_process(sk, skb))
1327 		goto reset;
1328 	if (opt_skb)
1329 		goto ipv6_pktoptions;
1330 	return 0;
1331 
1332 reset:
1333 	tcp_v6_send_reset(sk, skb);
1334 discard:
1335 	if (opt_skb)
1336 		__kfree_skb(opt_skb);
1337 	kfree_skb(skb);
1338 	return 0;
1339 csum_err:
1340 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1341 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1342 	goto discard;
1343 
1344 
1345 ipv6_pktoptions:
1346 	/* Do you ask, what is it?
1347 
1348 	   1. skb was enqueued by tcp.
1349 	   2. skb is added to tail of read queue, rather than out of order.
1350 	   3. socket is not in passive state.
1351 	   4. Finally, it really contains options, which user wants to receive.
1352 	 */
1353 	tp = tcp_sk(sk);
1354 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1355 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1356 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1357 			np->mcast_oif = tcp_v6_iif(opt_skb);
1358 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1359 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1360 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1361 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1362 		if (np->repflow)
1363 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1364 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1365 			skb_set_owner_r(opt_skb, sk);
1366 			tcp_v6_restore_cb(opt_skb);
1367 			opt_skb = xchg(&np->pktoptions, opt_skb);
1368 		} else {
1369 			__kfree_skb(opt_skb);
1370 			opt_skb = xchg(&np->pktoptions, NULL);
1371 		}
1372 	}
1373 
1374 	kfree_skb(opt_skb);
1375 	return 0;
1376 }
1377 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1378 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1379 			   const struct tcphdr *th)
1380 {
1381 	/* This is tricky: we move IP6CB at its correct location into
1382 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1383 	 * _decode_session6() uses IP6CB().
1384 	 * barrier() makes sure compiler won't play aliasing games.
1385 	 */
1386 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1387 		sizeof(struct inet6_skb_parm));
1388 	barrier();
1389 
1390 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1391 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1392 				    skb->len - th->doff*4);
1393 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1394 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1395 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1396 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1397 	TCP_SKB_CB(skb)->sacked = 0;
1398 	TCP_SKB_CB(skb)->has_rxtstamp =
1399 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1400 }
1401 
tcp_v6_rcv(struct sk_buff * skb)1402 static int tcp_v6_rcv(struct sk_buff *skb)
1403 {
1404 	int sdif = inet6_sdif(skb);
1405 	const struct tcphdr *th;
1406 	const struct ipv6hdr *hdr;
1407 	bool refcounted;
1408 	struct sock *sk;
1409 	int ret;
1410 	struct net *net = dev_net(skb->dev);
1411 
1412 	if (skb->pkt_type != PACKET_HOST)
1413 		goto discard_it;
1414 
1415 	/*
1416 	 *	Count it even if it's bad.
1417 	 */
1418 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1419 
1420 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1421 		goto discard_it;
1422 
1423 	th = (const struct tcphdr *)skb->data;
1424 
1425 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1426 		goto bad_packet;
1427 	if (!pskb_may_pull(skb, th->doff*4))
1428 		goto discard_it;
1429 
1430 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1431 		goto csum_error;
1432 
1433 	th = (const struct tcphdr *)skb->data;
1434 	hdr = ipv6_hdr(skb);
1435 
1436 lookup:
1437 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1438 				th->source, th->dest, inet6_iif(skb), sdif,
1439 				&refcounted);
1440 	if (!sk)
1441 		goto no_tcp_socket;
1442 
1443 process:
1444 	if (sk->sk_state == TCP_TIME_WAIT)
1445 		goto do_time_wait;
1446 
1447 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1448 		struct request_sock *req = inet_reqsk(sk);
1449 		struct sock *nsk;
1450 
1451 		sk = req->rsk_listener;
1452 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
1453 			sk_drops_add(sk, skb);
1454 			reqsk_put(req);
1455 			goto discard_it;
1456 		}
1457 		if (tcp_checksum_complete(skb)) {
1458 			reqsk_put(req);
1459 			goto csum_error;
1460 		}
1461 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1462 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1463 			goto lookup;
1464 		}
1465 		sock_hold(sk);
1466 		refcounted = true;
1467 		nsk = NULL;
1468 		if (!tcp_filter(sk, skb)) {
1469 			th = (const struct tcphdr *)skb->data;
1470 			hdr = ipv6_hdr(skb);
1471 			tcp_v6_fill_cb(skb, hdr, th);
1472 			nsk = tcp_check_req(sk, skb, req, false);
1473 		}
1474 		if (!nsk) {
1475 			reqsk_put(req);
1476 			goto discard_and_relse;
1477 		}
1478 		if (nsk == sk) {
1479 			reqsk_put(req);
1480 			tcp_v6_restore_cb(skb);
1481 		} else if (tcp_child_process(sk, nsk, skb)) {
1482 			tcp_v6_send_reset(nsk, skb);
1483 			goto discard_and_relse;
1484 		} else {
1485 			sock_put(sk);
1486 			return 0;
1487 		}
1488 	}
1489 	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1490 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1491 		goto discard_and_relse;
1492 	}
1493 
1494 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1495 		goto discard_and_relse;
1496 
1497 	if (tcp_v6_inbound_md5_hash(sk, skb))
1498 		goto discard_and_relse;
1499 
1500 	if (tcp_filter(sk, skb))
1501 		goto discard_and_relse;
1502 	th = (const struct tcphdr *)skb->data;
1503 	hdr = ipv6_hdr(skb);
1504 	tcp_v6_fill_cb(skb, hdr, th);
1505 
1506 	skb->dev = NULL;
1507 
1508 	if (sk->sk_state == TCP_LISTEN) {
1509 		ret = tcp_v6_do_rcv(sk, skb);
1510 		goto put_and_return;
1511 	}
1512 
1513 	sk_incoming_cpu_update(sk);
1514 
1515 	bh_lock_sock_nested(sk);
1516 	tcp_segs_in(tcp_sk(sk), skb);
1517 	ret = 0;
1518 	if (!sock_owned_by_user(sk)) {
1519 		ret = tcp_v6_do_rcv(sk, skb);
1520 	} else if (tcp_add_backlog(sk, skb)) {
1521 		goto discard_and_relse;
1522 	}
1523 	bh_unlock_sock(sk);
1524 
1525 put_and_return:
1526 	if (refcounted)
1527 		sock_put(sk);
1528 	return ret ? -1 : 0;
1529 
1530 no_tcp_socket:
1531 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1532 		goto discard_it;
1533 
1534 	tcp_v6_fill_cb(skb, hdr, th);
1535 
1536 	if (tcp_checksum_complete(skb)) {
1537 csum_error:
1538 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1539 bad_packet:
1540 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1541 	} else {
1542 		tcp_v6_send_reset(NULL, skb);
1543 	}
1544 
1545 discard_it:
1546 	kfree_skb(skb);
1547 	return 0;
1548 
1549 discard_and_relse:
1550 	sk_drops_add(sk, skb);
1551 	if (refcounted)
1552 		sock_put(sk);
1553 	goto discard_it;
1554 
1555 do_time_wait:
1556 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1557 		inet_twsk_put(inet_twsk(sk));
1558 		goto discard_it;
1559 	}
1560 
1561 	tcp_v6_fill_cb(skb, hdr, th);
1562 
1563 	if (tcp_checksum_complete(skb)) {
1564 		inet_twsk_put(inet_twsk(sk));
1565 		goto csum_error;
1566 	}
1567 
1568 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1569 	case TCP_TW_SYN:
1570 	{
1571 		struct sock *sk2;
1572 
1573 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1574 					    skb, __tcp_hdrlen(th),
1575 					    &ipv6_hdr(skb)->saddr, th->source,
1576 					    &ipv6_hdr(skb)->daddr,
1577 					    ntohs(th->dest),
1578 					    tcp_v6_iif_l3_slave(skb),
1579 					    sdif);
1580 		if (sk2) {
1581 			struct inet_timewait_sock *tw = inet_twsk(sk);
1582 			inet_twsk_deschedule_put(tw);
1583 			sk = sk2;
1584 			tcp_v6_restore_cb(skb);
1585 			refcounted = false;
1586 			goto process;
1587 		}
1588 		/* Fall through to ACK */
1589 	}
1590 	case TCP_TW_ACK:
1591 		tcp_v6_timewait_ack(sk, skb);
1592 		break;
1593 	case TCP_TW_RST:
1594 		tcp_v6_send_reset(sk, skb);
1595 		inet_twsk_deschedule_put(inet_twsk(sk));
1596 		goto discard_it;
1597 	case TCP_TW_SUCCESS:
1598 		;
1599 	}
1600 	goto discard_it;
1601 }
1602 
tcp_v6_early_demux(struct sk_buff * skb)1603 static void tcp_v6_early_demux(struct sk_buff *skb)
1604 {
1605 	const struct ipv6hdr *hdr;
1606 	const struct tcphdr *th;
1607 	struct sock *sk;
1608 
1609 	if (skb->pkt_type != PACKET_HOST)
1610 		return;
1611 
1612 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1613 		return;
1614 
1615 	hdr = ipv6_hdr(skb);
1616 	th = tcp_hdr(skb);
1617 
1618 	if (th->doff < sizeof(struct tcphdr) / 4)
1619 		return;
1620 
1621 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1622 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1623 					&hdr->saddr, th->source,
1624 					&hdr->daddr, ntohs(th->dest),
1625 					inet6_iif(skb), inet6_sdif(skb));
1626 	if (sk) {
1627 		skb->sk = sk;
1628 		skb->destructor = sock_edemux;
1629 		if (sk_fullsock(sk)) {
1630 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1631 
1632 			if (dst)
1633 				dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1634 			if (dst &&
1635 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1636 				skb_dst_set_noref(skb, dst);
1637 		}
1638 	}
1639 }
1640 
1641 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1642 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1643 	.twsk_unique	= tcp_twsk_unique,
1644 	.twsk_destructor = tcp_twsk_destructor,
1645 };
1646 
1647 static const struct inet_connection_sock_af_ops ipv6_specific = {
1648 	.queue_xmit	   = inet6_csk_xmit,
1649 	.send_check	   = tcp_v6_send_check,
1650 	.rebuild_header	   = inet6_sk_rebuild_header,
1651 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1652 	.conn_request	   = tcp_v6_conn_request,
1653 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1654 	.net_header_len	   = sizeof(struct ipv6hdr),
1655 	.net_frag_header_len = sizeof(struct frag_hdr),
1656 	.setsockopt	   = ipv6_setsockopt,
1657 	.getsockopt	   = ipv6_getsockopt,
1658 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1659 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1660 #ifdef CONFIG_COMPAT
1661 	.compat_setsockopt = compat_ipv6_setsockopt,
1662 	.compat_getsockopt = compat_ipv6_getsockopt,
1663 #endif
1664 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1665 };
1666 
1667 #ifdef CONFIG_TCP_MD5SIG
1668 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1669 	.md5_lookup	=	tcp_v6_md5_lookup,
1670 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1671 	.md5_parse	=	tcp_v6_parse_md5_keys,
1672 };
1673 #endif
1674 
1675 /*
1676  *	TCP over IPv4 via INET6 API
1677  */
1678 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1679 	.queue_xmit	   = ip_queue_xmit,
1680 	.send_check	   = tcp_v4_send_check,
1681 	.rebuild_header	   = inet_sk_rebuild_header,
1682 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1683 	.conn_request	   = tcp_v6_conn_request,
1684 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1685 	.net_header_len	   = sizeof(struct iphdr),
1686 	.setsockopt	   = ipv6_setsockopt,
1687 	.getsockopt	   = ipv6_getsockopt,
1688 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1689 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1690 #ifdef CONFIG_COMPAT
1691 	.compat_setsockopt = compat_ipv6_setsockopt,
1692 	.compat_getsockopt = compat_ipv6_getsockopt,
1693 #endif
1694 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1695 };
1696 
1697 #ifdef CONFIG_TCP_MD5SIG
1698 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1699 	.md5_lookup	=	tcp_v4_md5_lookup,
1700 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1701 	.md5_parse	=	tcp_v6_parse_md5_keys,
1702 };
1703 #endif
1704 
1705 /* NOTE: A lot of things set to zero explicitly by call to
1706  *       sk_alloc() so need not be done here.
1707  */
tcp_v6_init_sock(struct sock * sk)1708 static int tcp_v6_init_sock(struct sock *sk)
1709 {
1710 	struct inet_connection_sock *icsk = inet_csk(sk);
1711 
1712 	tcp_init_sock(sk);
1713 
1714 	icsk->icsk_af_ops = &ipv6_specific;
1715 
1716 #ifdef CONFIG_TCP_MD5SIG
1717 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1718 #endif
1719 
1720 	return 0;
1721 }
1722 
tcp_v6_destroy_sock(struct sock * sk)1723 static void tcp_v6_destroy_sock(struct sock *sk)
1724 {
1725 	tcp_v4_destroy_sock(sk);
1726 	inet6_destroy_sock(sk);
1727 }
1728 
1729 #ifdef CONFIG_PROC_FS
1730 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1731 static void get_openreq6(struct seq_file *seq,
1732 			 const struct request_sock *req, int i)
1733 {
1734 	long ttd = req->rsk_timer.expires - jiffies;
1735 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1736 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1737 
1738 	if (ttd < 0)
1739 		ttd = 0;
1740 
1741 	seq_printf(seq,
1742 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1743 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1744 		   i,
1745 		   src->s6_addr32[0], src->s6_addr32[1],
1746 		   src->s6_addr32[2], src->s6_addr32[3],
1747 		   inet_rsk(req)->ir_num,
1748 		   dest->s6_addr32[0], dest->s6_addr32[1],
1749 		   dest->s6_addr32[2], dest->s6_addr32[3],
1750 		   ntohs(inet_rsk(req)->ir_rmt_port),
1751 		   TCP_SYN_RECV,
1752 		   0, 0, /* could print option size, but that is af dependent. */
1753 		   1,   /* timers active (only the expire timer) */
1754 		   jiffies_to_clock_t(ttd),
1755 		   req->num_timeout,
1756 		   from_kuid_munged(seq_user_ns(seq),
1757 				    sock_i_uid(req->rsk_listener)),
1758 		   0,  /* non standard timer */
1759 		   0, /* open_requests have no inode */
1760 		   0, req);
1761 }
1762 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1763 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1764 {
1765 	const struct in6_addr *dest, *src;
1766 	__u16 destp, srcp;
1767 	int timer_active;
1768 	unsigned long timer_expires;
1769 	const struct inet_sock *inet = inet_sk(sp);
1770 	const struct tcp_sock *tp = tcp_sk(sp);
1771 	const struct inet_connection_sock *icsk = inet_csk(sp);
1772 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1773 	int rx_queue;
1774 	int state;
1775 
1776 	dest  = &sp->sk_v6_daddr;
1777 	src   = &sp->sk_v6_rcv_saddr;
1778 	destp = ntohs(inet->inet_dport);
1779 	srcp  = ntohs(inet->inet_sport);
1780 
1781 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1782 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1783 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1784 		timer_active	= 1;
1785 		timer_expires	= icsk->icsk_timeout;
1786 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1787 		timer_active	= 4;
1788 		timer_expires	= icsk->icsk_timeout;
1789 	} else if (timer_pending(&sp->sk_timer)) {
1790 		timer_active	= 2;
1791 		timer_expires	= sp->sk_timer.expires;
1792 	} else {
1793 		timer_active	= 0;
1794 		timer_expires = jiffies;
1795 	}
1796 
1797 	state = sk_state_load(sp);
1798 	if (state == TCP_LISTEN)
1799 		rx_queue = sp->sk_ack_backlog;
1800 	else
1801 		/* Because we don't lock the socket,
1802 		 * we might find a transient negative value.
1803 		 */
1804 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1805 
1806 	seq_printf(seq,
1807 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1808 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1809 		   i,
1810 		   src->s6_addr32[0], src->s6_addr32[1],
1811 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1812 		   dest->s6_addr32[0], dest->s6_addr32[1],
1813 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1814 		   state,
1815 		   tp->write_seq - tp->snd_una,
1816 		   rx_queue,
1817 		   timer_active,
1818 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1819 		   icsk->icsk_retransmits,
1820 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1821 		   icsk->icsk_probes_out,
1822 		   sock_i_ino(sp),
1823 		   refcount_read(&sp->sk_refcnt), sp,
1824 		   jiffies_to_clock_t(icsk->icsk_rto),
1825 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1826 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1827 		   tp->snd_cwnd,
1828 		   state == TCP_LISTEN ?
1829 			fastopenq->max_qlen :
1830 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1831 		   );
1832 }
1833 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)1834 static void get_timewait6_sock(struct seq_file *seq,
1835 			       struct inet_timewait_sock *tw, int i)
1836 {
1837 	long delta = tw->tw_timer.expires - jiffies;
1838 	const struct in6_addr *dest, *src;
1839 	__u16 destp, srcp;
1840 
1841 	dest = &tw->tw_v6_daddr;
1842 	src  = &tw->tw_v6_rcv_saddr;
1843 	destp = ntohs(tw->tw_dport);
1844 	srcp  = ntohs(tw->tw_sport);
1845 
1846 	seq_printf(seq,
1847 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1848 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1849 		   i,
1850 		   src->s6_addr32[0], src->s6_addr32[1],
1851 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1852 		   dest->s6_addr32[0], dest->s6_addr32[1],
1853 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1854 		   tw->tw_substate, 0, 0,
1855 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1856 		   refcount_read(&tw->tw_refcnt), tw);
1857 }
1858 
tcp6_seq_show(struct seq_file * seq,void * v)1859 static int tcp6_seq_show(struct seq_file *seq, void *v)
1860 {
1861 	struct tcp_iter_state *st;
1862 	struct sock *sk = v;
1863 
1864 	if (v == SEQ_START_TOKEN) {
1865 		seq_puts(seq,
1866 			 "  sl  "
1867 			 "local_address                         "
1868 			 "remote_address                        "
1869 			 "st tx_queue rx_queue tr tm->when retrnsmt"
1870 			 "   uid  timeout inode\n");
1871 		goto out;
1872 	}
1873 	st = seq->private;
1874 
1875 	if (sk->sk_state == TCP_TIME_WAIT)
1876 		get_timewait6_sock(seq, v, st->num);
1877 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
1878 		get_openreq6(seq, v, st->num);
1879 	else
1880 		get_tcp6_sock(seq, v, st->num);
1881 out:
1882 	return 0;
1883 }
1884 
1885 static const struct file_operations tcp6_afinfo_seq_fops = {
1886 	.owner   = THIS_MODULE,
1887 	.open    = tcp_seq_open,
1888 	.read    = seq_read,
1889 	.llseek  = seq_lseek,
1890 	.release = seq_release_net
1891 };
1892 
1893 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1894 	.name		= "tcp6",
1895 	.family		= AF_INET6,
1896 	.seq_fops	= &tcp6_afinfo_seq_fops,
1897 	.seq_ops	= {
1898 		.show		= tcp6_seq_show,
1899 	},
1900 };
1901 
tcp6_proc_init(struct net * net)1902 int __net_init tcp6_proc_init(struct net *net)
1903 {
1904 	return tcp_proc_register(net, &tcp6_seq_afinfo);
1905 }
1906 
tcp6_proc_exit(struct net * net)1907 void tcp6_proc_exit(struct net *net)
1908 {
1909 	tcp_proc_unregister(net, &tcp6_seq_afinfo);
1910 }
1911 #endif
1912 
1913 struct proto tcpv6_prot = {
1914 	.name			= "TCPv6",
1915 	.owner			= THIS_MODULE,
1916 	.close			= tcp_close,
1917 	.connect		= tcp_v6_connect,
1918 	.disconnect		= tcp_disconnect,
1919 	.accept			= inet_csk_accept,
1920 	.ioctl			= tcp_ioctl,
1921 	.init			= tcp_v6_init_sock,
1922 	.destroy		= tcp_v6_destroy_sock,
1923 	.shutdown		= tcp_shutdown,
1924 	.setsockopt		= tcp_setsockopt,
1925 	.getsockopt		= tcp_getsockopt,
1926 	.keepalive		= tcp_set_keepalive,
1927 	.recvmsg		= tcp_recvmsg,
1928 	.sendmsg		= tcp_sendmsg,
1929 	.sendpage		= tcp_sendpage,
1930 	.backlog_rcv		= tcp_v6_do_rcv,
1931 	.release_cb		= tcp_release_cb,
1932 	.hash			= inet6_hash,
1933 	.unhash			= inet_unhash,
1934 	.get_port		= inet_csk_get_port,
1935 	.enter_memory_pressure	= tcp_enter_memory_pressure,
1936 	.leave_memory_pressure	= tcp_leave_memory_pressure,
1937 	.stream_memory_free	= tcp_stream_memory_free,
1938 	.sockets_allocated	= &tcp_sockets_allocated,
1939 	.memory_allocated	= &tcp_memory_allocated,
1940 	.memory_pressure	= &tcp_memory_pressure,
1941 	.orphan_count		= &tcp_orphan_count,
1942 	.sysctl_mem		= sysctl_tcp_mem,
1943 	.sysctl_wmem		= sysctl_tcp_wmem,
1944 	.sysctl_rmem		= sysctl_tcp_rmem,
1945 	.max_header		= MAX_TCP_HEADER,
1946 	.obj_size		= sizeof(struct tcp6_sock),
1947 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
1948 	.twsk_prot		= &tcp6_timewait_sock_ops,
1949 	.rsk_prot		= &tcp6_request_sock_ops,
1950 	.h.hashinfo		= &tcp_hashinfo,
1951 	.no_autobind		= true,
1952 #ifdef CONFIG_COMPAT
1953 	.compat_setsockopt	= compat_tcp_setsockopt,
1954 	.compat_getsockopt	= compat_tcp_getsockopt,
1955 #endif
1956 	.diag_destroy		= tcp_abort,
1957 };
1958 
1959 /* thinking of making this const? Don't.
1960  * early_demux can change based on sysctl.
1961  */
1962 static struct inet6_protocol tcpv6_protocol = {
1963 	.early_demux	=	tcp_v6_early_demux,
1964 	.early_demux_handler =  tcp_v6_early_demux,
1965 	.handler	=	tcp_v6_rcv,
1966 	.err_handler	=	tcp_v6_err,
1967 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1968 };
1969 
1970 static struct inet_protosw tcpv6_protosw = {
1971 	.type		=	SOCK_STREAM,
1972 	.protocol	=	IPPROTO_TCP,
1973 	.prot		=	&tcpv6_prot,
1974 	.ops		=	&inet6_stream_ops,
1975 	.flags		=	INET_PROTOSW_PERMANENT |
1976 				INET_PROTOSW_ICSK,
1977 };
1978 
tcpv6_net_init(struct net * net)1979 static int __net_init tcpv6_net_init(struct net *net)
1980 {
1981 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1982 				    SOCK_RAW, IPPROTO_TCP, net);
1983 }
1984 
tcpv6_net_exit(struct net * net)1985 static void __net_exit tcpv6_net_exit(struct net *net)
1986 {
1987 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1988 }
1989 
tcpv6_net_exit_batch(struct list_head * net_exit_list)1990 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1991 {
1992 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
1993 }
1994 
1995 static struct pernet_operations tcpv6_net_ops = {
1996 	.init	    = tcpv6_net_init,
1997 	.exit	    = tcpv6_net_exit,
1998 	.exit_batch = tcpv6_net_exit_batch,
1999 };
2000 
tcpv6_init(void)2001 int __init tcpv6_init(void)
2002 {
2003 	int ret;
2004 
2005 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2006 	if (ret)
2007 		goto out;
2008 
2009 	/* register inet6 protocol */
2010 	ret = inet6_register_protosw(&tcpv6_protosw);
2011 	if (ret)
2012 		goto out_tcpv6_protocol;
2013 
2014 	ret = register_pernet_subsys(&tcpv6_net_ops);
2015 	if (ret)
2016 		goto out_tcpv6_protosw;
2017 out:
2018 	return ret;
2019 
2020 out_tcpv6_protosw:
2021 	inet6_unregister_protosw(&tcpv6_protosw);
2022 out_tcpv6_protocol:
2023 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2024 	goto out;
2025 }
2026 
tcpv6_exit(void)2027 void tcpv6_exit(void)
2028 {
2029 	unregister_pernet_subsys(&tcpv6_net_ops);
2030 	inet6_unregister_protosw(&tcpv6_protosw);
2031 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2032 }
2033