1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 struct dst_entry *dst = skb_dst(skb);
106
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 }
114 }
115
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->dest,
121 tcp_hdr(skb)->source);
122 }
123
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 int addr_len)
132 {
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
136 */
137 if (addr_len < SIN6_LEN_RFC2133)
138 return -EINVAL;
139
140 sock_owned_by_me(sk);
141
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 int addr_len)
147 {
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
155 struct flowi6 fl6;
156 struct dst_entry *dst;
157 int addr_type;
158 int err;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161 if (addr_len < SIN6_LEN_RFC2133)
162 return -EINVAL;
163
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
166
167 memset(&fl6, 0, sizeof(fl6));
168
169 if (np->sndflow) {
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
176 return -EINVAL;
177 fl6_sock_release(flowlabel);
178 }
179 }
180
181 /*
182 * connect() to INADDR_ANY means loopback (BSD'ism).
183 */
184
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 &usin->sin6_addr);
189 else
190 usin->sin6_addr = in6addr_loopback;
191 }
192
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195 if (addr_type & IPV6_ADDR_MULTICAST)
196 return -ENETUNREACH;
197
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
202 * must coincide.
203 */
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 return -EINVAL;
206
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
208 }
209
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
212 return -EINVAL;
213 }
214
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
220 }
221
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
224
225 /*
226 * TCP over IPv4
227 */
228
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
232
233 if (__ipv6_only_sock(sk))
234 return -ENETUNREACH;
235
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
241 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
242 if (sk_is_mptcp(sk))
243 mptcpv6_handle_mapped(sk, true);
244 sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247 #endif
248
249 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250
251 if (err) {
252 icsk->icsk_ext_hdr_len = exthdrlen;
253 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
254 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
255 if (sk_is_mptcp(sk))
256 mptcpv6_handle_mapped(sk, false);
257 sk->sk_backlog_rcv = tcp_v6_do_rcv;
258 #ifdef CONFIG_TCP_MD5SIG
259 tp->af_specific = &tcp_sock_ipv6_specific;
260 #endif
261 goto failure;
262 }
263 np->saddr = sk->sk_v6_rcv_saddr;
264
265 return err;
266 }
267
268 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269 saddr = &sk->sk_v6_rcv_saddr;
270
271 fl6.flowi6_proto = IPPROTO_TCP;
272 fl6.daddr = sk->sk_v6_daddr;
273 fl6.saddr = saddr ? *saddr : np->saddr;
274 fl6.flowi6_oif = sk->sk_bound_dev_if;
275 fl6.flowi6_mark = sk->sk_mark;
276 fl6.fl6_dport = usin->sin6_port;
277 fl6.fl6_sport = inet->inet_sport;
278 fl6.flowi6_uid = sk->sk_uid;
279
280 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
281 final_p = fl6_update_dst(&fl6, opt, &final);
282
283 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
284
285 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
286 if (IS_ERR(dst)) {
287 err = PTR_ERR(dst);
288 goto failure;
289 }
290
291 if (!saddr) {
292 saddr = &fl6.saddr;
293 sk->sk_v6_rcv_saddr = *saddr;
294 }
295
296 /* set the source address */
297 np->saddr = *saddr;
298 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
299
300 sk->sk_gso_type = SKB_GSO_TCPV6;
301 ip6_dst_store(sk, dst, NULL, NULL);
302
303 icsk->icsk_ext_hdr_len = 0;
304 if (opt)
305 icsk->icsk_ext_hdr_len = opt->opt_flen +
306 opt->opt_nflen;
307
308 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
309
310 inet->inet_dport = usin->sin6_port;
311
312 tcp_set_state(sk, TCP_SYN_SENT);
313 err = inet6_hash_connect(tcp_death_row, sk);
314 if (err)
315 goto late_failure;
316
317 sk_set_txhash(sk);
318
319 if (likely(!tp->repair)) {
320 if (!tp->write_seq)
321 WRITE_ONCE(tp->write_seq,
322 secure_tcpv6_seq(np->saddr.s6_addr32,
323 sk->sk_v6_daddr.s6_addr32,
324 inet->inet_sport,
325 inet->inet_dport));
326 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
327 np->saddr.s6_addr32,
328 sk->sk_v6_daddr.s6_addr32);
329 }
330
331 if (tcp_fastopen_defer_connect(sk, &err))
332 return err;
333 if (err)
334 goto late_failure;
335
336 err = tcp_connect(sk);
337 if (err)
338 goto late_failure;
339
340 return 0;
341
342 late_failure:
343 tcp_set_state(sk, TCP_CLOSE);
344 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
345 inet_reset_saddr(sk);
346 failure:
347 inet->inet_dport = 0;
348 sk->sk_route_caps = 0;
349 return err;
350 }
351
tcp_v6_mtu_reduced(struct sock * sk)352 static void tcp_v6_mtu_reduced(struct sock *sk)
353 {
354 struct dst_entry *dst;
355 u32 mtu;
356
357 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
358 return;
359
360 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
361
362 /* Drop requests trying to increase our current mss.
363 * Check done in __ip6_rt_update_pmtu() is too late.
364 */
365 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
366 return;
367
368 dst = inet6_csk_update_pmtu(sk, mtu);
369 if (!dst)
370 return;
371
372 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
373 tcp_sync_mss(sk, dst_mtu(dst));
374 tcp_simple_retransmit(sk);
375 }
376 }
377
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
379 u8 type, u8 code, int offset, __be32 info)
380 {
381 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
382 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
383 struct net *net = dev_net(skb->dev);
384 struct request_sock *fastopen;
385 struct ipv6_pinfo *np;
386 struct tcp_sock *tp;
387 __u32 seq, snd_una;
388 struct sock *sk;
389 bool fatal;
390 int err;
391
392 sk = __inet6_lookup_established(net, &tcp_hashinfo,
393 &hdr->daddr, th->dest,
394 &hdr->saddr, ntohs(th->source),
395 skb->dev->ifindex, inet6_sdif(skb));
396
397 if (!sk) {
398 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
399 ICMP6_MIB_INERRORS);
400 return -ENOENT;
401 }
402
403 if (sk->sk_state == TCP_TIME_WAIT) {
404 inet_twsk_put(inet_twsk(sk));
405 return 0;
406 }
407 seq = ntohl(th->seq);
408 fatal = icmpv6_err_convert(type, code, &err);
409 if (sk->sk_state == TCP_NEW_SYN_RECV) {
410 tcp_req_err(sk, seq, fatal);
411 return 0;
412 }
413
414 bh_lock_sock(sk);
415 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
416 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
417
418 if (sk->sk_state == TCP_CLOSE)
419 goto out;
420
421 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
422 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
423 goto out;
424 }
425
426 tp = tcp_sk(sk);
427 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428 fastopen = rcu_dereference(tp->fastopen_rsk);
429 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430 if (sk->sk_state != TCP_LISTEN &&
431 !between(seq, snd_una, tp->snd_nxt)) {
432 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433 goto out;
434 }
435
436 np = tcp_inet6_sk(sk);
437
438 if (type == NDISC_REDIRECT) {
439 if (!sock_owned_by_user(sk)) {
440 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441
442 if (dst)
443 dst->ops->redirect(dst, sk, skb);
444 }
445 goto out;
446 }
447
448 if (type == ICMPV6_PKT_TOOBIG) {
449 u32 mtu = ntohl(info);
450
451 /* We are not interested in TCP_LISTEN and open_requests
452 * (SYN-ACKs send out by Linux are always <576bytes so
453 * they should go through unfragmented).
454 */
455 if (sk->sk_state == TCP_LISTEN)
456 goto out;
457
458 if (!ip6_sk_accept_pmtu(sk))
459 goto out;
460
461 if (mtu < IPV6_MIN_MTU)
462 goto out;
463
464 WRITE_ONCE(tp->mtu_info, mtu);
465
466 if (!sock_owned_by_user(sk))
467 tcp_v6_mtu_reduced(sk);
468 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469 &sk->sk_tsq_flags))
470 sock_hold(sk);
471 goto out;
472 }
473
474
475 /* Might be for an request_sock */
476 switch (sk->sk_state) {
477 case TCP_SYN_SENT:
478 case TCP_SYN_RECV:
479 /* Only in fast or simultaneous open. If a fast open socket is
480 * already accepted it is treated as a connected one below.
481 */
482 if (fastopen && !fastopen->sk)
483 break;
484
485 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486
487 if (!sock_owned_by_user(sk)) {
488 sk->sk_err = err;
489 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
490
491 tcp_done(sk);
492 } else
493 sk->sk_err_soft = err;
494 goto out;
495 case TCP_LISTEN:
496 break;
497 default:
498 /* check if this ICMP message allows revert of backoff.
499 * (see RFC 6069)
500 */
501 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502 code == ICMPV6_NOROUTE)
503 tcp_ld_RTO_revert(sk, seq);
504 }
505
506 if (!sock_owned_by_user(sk) && np->recverr) {
507 sk->sk_err = err;
508 sk->sk_error_report(sk);
509 } else
510 sk->sk_err_soft = err;
511
512 out:
513 bh_unlock_sock(sk);
514 sock_put(sk);
515 return 0;
516 }
517
518
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520 struct flowi *fl,
521 struct request_sock *req,
522 struct tcp_fastopen_cookie *foc,
523 enum tcp_synack_type synack_type,
524 struct sk_buff *syn_skb)
525 {
526 struct inet_request_sock *ireq = inet_rsk(req);
527 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528 struct ipv6_txoptions *opt;
529 struct flowi6 *fl6 = &fl->u.ip6;
530 struct sk_buff *skb;
531 int err = -ENOMEM;
532 u8 tclass;
533
534 /* First, grab a route. */
535 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536 IPPROTO_TCP)) == NULL)
537 goto done;
538
539 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540
541 if (skb) {
542 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543 &ireq->ir_v6_rmt_addr);
544
545 fl6->daddr = ireq->ir_v6_rmt_addr;
546 if (np->repflow && ireq->pktopts)
547 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548
549 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
550 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551 (np->tclass & INET_ECN_MASK) :
552 np->tclass;
553
554 if (!INET_ECN_is_capable(tclass) &&
555 tcp_bpf_ca_needs_ecn((struct sock *)req))
556 tclass |= INET_ECN_ECT_0;
557
558 rcu_read_lock();
559 opt = ireq->ipv6_opt;
560 if (!opt)
561 opt = rcu_dereference(np->opt);
562 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563 tclass, sk->sk_priority);
564 rcu_read_unlock();
565 err = net_xmit_eval(err);
566 }
567
568 done:
569 return err;
570 }
571
572
tcp_v6_reqsk_destructor(struct request_sock * req)573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575 kfree(inet_rsk(req)->ipv6_opt);
576 kfree_skb(inet_rsk(req)->pktopts);
577 }
578
579 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581 const struct in6_addr *addr,
582 int l3index)
583 {
584 return tcp_md5_do_lookup(sk, l3index,
585 (union tcp_md5_addr *)addr, AF_INET6);
586 }
587
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589 const struct sock *addr_sk)
590 {
591 int l3index;
592
593 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594 addr_sk->sk_bound_dev_if);
595 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596 l3index);
597 }
598
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600 sockptr_t optval, int optlen)
601 {
602 struct tcp_md5sig cmd;
603 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604 int l3index = 0;
605 u8 prefixlen;
606
607 if (optlen < sizeof(cmd))
608 return -EINVAL;
609
610 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611 return -EFAULT;
612
613 if (sin6->sin6_family != AF_INET6)
614 return -EINVAL;
615
616 if (optname == TCP_MD5SIG_EXT &&
617 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
618 prefixlen = cmd.tcpm_prefixlen;
619 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
620 prefixlen > 32))
621 return -EINVAL;
622 } else {
623 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
624 }
625
626 if (optname == TCP_MD5SIG_EXT &&
627 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
628 struct net_device *dev;
629
630 rcu_read_lock();
631 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
632 if (dev && netif_is_l3_master(dev))
633 l3index = dev->ifindex;
634 rcu_read_unlock();
635
636 /* ok to reference set/not set outside of rcu;
637 * right now device MUST be an L3 master
638 */
639 if (!dev || !l3index)
640 return -EINVAL;
641 }
642
643 if (!cmd.tcpm_keylen) {
644 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
645 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
646 AF_INET, prefixlen,
647 l3index);
648 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
649 AF_INET6, prefixlen, l3index);
650 }
651
652 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
653 return -EINVAL;
654
655 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657 AF_INET, prefixlen, l3index,
658 cmd.tcpm_key, cmd.tcpm_keylen,
659 GFP_KERNEL);
660
661 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
662 AF_INET6, prefixlen, l3index,
663 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
664 }
665
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)666 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
667 const struct in6_addr *daddr,
668 const struct in6_addr *saddr,
669 const struct tcphdr *th, int nbytes)
670 {
671 struct tcp6_pseudohdr *bp;
672 struct scatterlist sg;
673 struct tcphdr *_th;
674
675 bp = hp->scratch;
676 /* 1. TCP pseudo-header (RFC2460) */
677 bp->saddr = *saddr;
678 bp->daddr = *daddr;
679 bp->protocol = cpu_to_be32(IPPROTO_TCP);
680 bp->len = cpu_to_be32(nbytes);
681
682 _th = (struct tcphdr *)(bp + 1);
683 memcpy(_th, th, sizeof(*th));
684 _th->check = 0;
685
686 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
687 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
688 sizeof(*bp) + sizeof(*th));
689 return crypto_ahash_update(hp->md5_req);
690 }
691
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)692 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
693 const struct in6_addr *daddr, struct in6_addr *saddr,
694 const struct tcphdr *th)
695 {
696 struct tcp_md5sig_pool *hp;
697 struct ahash_request *req;
698
699 hp = tcp_get_md5sig_pool();
700 if (!hp)
701 goto clear_hash_noput;
702 req = hp->md5_req;
703
704 if (crypto_ahash_init(req))
705 goto clear_hash;
706 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
707 goto clear_hash;
708 if (tcp_md5_hash_key(hp, key))
709 goto clear_hash;
710 ahash_request_set_crypt(req, NULL, md5_hash, 0);
711 if (crypto_ahash_final(req))
712 goto clear_hash;
713
714 tcp_put_md5sig_pool();
715 return 0;
716
717 clear_hash:
718 tcp_put_md5sig_pool();
719 clear_hash_noput:
720 memset(md5_hash, 0, 16);
721 return 1;
722 }
723
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)724 static int tcp_v6_md5_hash_skb(char *md5_hash,
725 const struct tcp_md5sig_key *key,
726 const struct sock *sk,
727 const struct sk_buff *skb)
728 {
729 const struct in6_addr *saddr, *daddr;
730 struct tcp_md5sig_pool *hp;
731 struct ahash_request *req;
732 const struct tcphdr *th = tcp_hdr(skb);
733
734 if (sk) { /* valid for establish/request sockets */
735 saddr = &sk->sk_v6_rcv_saddr;
736 daddr = &sk->sk_v6_daddr;
737 } else {
738 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
739 saddr = &ip6h->saddr;
740 daddr = &ip6h->daddr;
741 }
742
743 hp = tcp_get_md5sig_pool();
744 if (!hp)
745 goto clear_hash_noput;
746 req = hp->md5_req;
747
748 if (crypto_ahash_init(req))
749 goto clear_hash;
750
751 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
752 goto clear_hash;
753 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
754 goto clear_hash;
755 if (tcp_md5_hash_key(hp, key))
756 goto clear_hash;
757 ahash_request_set_crypt(req, NULL, md5_hash, 0);
758 if (crypto_ahash_final(req))
759 goto clear_hash;
760
761 tcp_put_md5sig_pool();
762 return 0;
763
764 clear_hash:
765 tcp_put_md5sig_pool();
766 clear_hash_noput:
767 memset(md5_hash, 0, 16);
768 return 1;
769 }
770
771 #endif
772
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)773 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
774 const struct sk_buff *skb,
775 int dif, int sdif)
776 {
777 #ifdef CONFIG_TCP_MD5SIG
778 const __u8 *hash_location = NULL;
779 struct tcp_md5sig_key *hash_expected;
780 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
781 const struct tcphdr *th = tcp_hdr(skb);
782 int genhash, l3index;
783 u8 newhash[16];
784
785 /* sdif set, means packet ingressed via a device
786 * in an L3 domain and dif is set to the l3mdev
787 */
788 l3index = sdif ? dif : 0;
789
790 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
791 hash_location = tcp_parse_md5sig_option(th);
792
793 /* We've parsed the options - do we have a hash? */
794 if (!hash_expected && !hash_location)
795 return false;
796
797 if (hash_expected && !hash_location) {
798 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
799 return true;
800 }
801
802 if (!hash_expected && hash_location) {
803 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
804 return true;
805 }
806
807 /* check the signature */
808 genhash = tcp_v6_md5_hash_skb(newhash,
809 hash_expected,
810 NULL, skb);
811
812 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
813 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
814 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
815 genhash ? "failed" : "mismatch",
816 &ip6h->saddr, ntohs(th->source),
817 &ip6h->daddr, ntohs(th->dest), l3index);
818 return true;
819 }
820 #endif
821 return false;
822 }
823
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)824 static void tcp_v6_init_req(struct request_sock *req,
825 const struct sock *sk_listener,
826 struct sk_buff *skb)
827 {
828 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
829 struct inet_request_sock *ireq = inet_rsk(req);
830 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
831
832 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
833 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
834
835 /* So that link locals have meaning */
836 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
837 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
838 ireq->ir_iif = tcp_v6_iif(skb);
839
840 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
841 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
842 np->rxopt.bits.rxinfo ||
843 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
844 np->rxopt.bits.rxohlim || np->repflow)) {
845 refcount_inc(&skb->users);
846 ireq->pktopts = skb;
847 }
848 }
849
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)850 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
851 struct flowi *fl,
852 const struct request_sock *req)
853 {
854 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
855 }
856
857 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
858 .family = AF_INET6,
859 .obj_size = sizeof(struct tcp6_request_sock),
860 .rtx_syn_ack = tcp_rtx_synack,
861 .send_ack = tcp_v6_reqsk_send_ack,
862 .destructor = tcp_v6_reqsk_destructor,
863 .send_reset = tcp_v6_send_reset,
864 .syn_ack_timeout = tcp_syn_ack_timeout,
865 };
866
867 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
868 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
869 sizeof(struct ipv6hdr),
870 #ifdef CONFIG_TCP_MD5SIG
871 .req_md5_lookup = tcp_v6_md5_lookup,
872 .calc_md5_hash = tcp_v6_md5_hash_skb,
873 #endif
874 .init_req = tcp_v6_init_req,
875 #ifdef CONFIG_SYN_COOKIES
876 .cookie_init_seq = cookie_v6_init_sequence,
877 #endif
878 .route_req = tcp_v6_route_req,
879 .init_seq = tcp_v6_init_seq,
880 .init_ts_off = tcp_v6_init_ts_off,
881 .send_synack = tcp_v6_send_synack,
882 };
883
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)884 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
885 u32 ack, u32 win, u32 tsval, u32 tsecr,
886 int oif, struct tcp_md5sig_key *key, int rst,
887 u8 tclass, __be32 label, u32 priority)
888 {
889 const struct tcphdr *th = tcp_hdr(skb);
890 struct tcphdr *t1;
891 struct sk_buff *buff;
892 struct flowi6 fl6;
893 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
894 struct sock *ctl_sk = net->ipv6.tcp_sk;
895 unsigned int tot_len = sizeof(struct tcphdr);
896 struct dst_entry *dst;
897 __be32 *topt;
898 __u32 mark = 0;
899
900 if (tsecr)
901 tot_len += TCPOLEN_TSTAMP_ALIGNED;
902 #ifdef CONFIG_TCP_MD5SIG
903 if (key)
904 tot_len += TCPOLEN_MD5SIG_ALIGNED;
905 #endif
906
907 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
908 GFP_ATOMIC);
909 if (!buff)
910 return;
911
912 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
913
914 t1 = skb_push(buff, tot_len);
915 skb_reset_transport_header(buff);
916
917 /* Swap the send and the receive. */
918 memset(t1, 0, sizeof(*t1));
919 t1->dest = th->source;
920 t1->source = th->dest;
921 t1->doff = tot_len / 4;
922 t1->seq = htonl(seq);
923 t1->ack_seq = htonl(ack);
924 t1->ack = !rst || !th->ack;
925 t1->rst = rst;
926 t1->window = htons(win);
927
928 topt = (__be32 *)(t1 + 1);
929
930 if (tsecr) {
931 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
932 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
933 *topt++ = htonl(tsval);
934 *topt++ = htonl(tsecr);
935 }
936
937 #ifdef CONFIG_TCP_MD5SIG
938 if (key) {
939 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
940 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
941 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
942 &ipv6_hdr(skb)->saddr,
943 &ipv6_hdr(skb)->daddr, t1);
944 }
945 #endif
946
947 memset(&fl6, 0, sizeof(fl6));
948 fl6.daddr = ipv6_hdr(skb)->saddr;
949 fl6.saddr = ipv6_hdr(skb)->daddr;
950 fl6.flowlabel = label;
951
952 buff->ip_summed = CHECKSUM_PARTIAL;
953 buff->csum = 0;
954
955 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
956
957 fl6.flowi6_proto = IPPROTO_TCP;
958 if (rt6_need_strict(&fl6.daddr) && !oif)
959 fl6.flowi6_oif = tcp_v6_iif(skb);
960 else {
961 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
962 oif = skb->skb_iif;
963
964 fl6.flowi6_oif = oif;
965 }
966
967 if (sk) {
968 if (sk->sk_state == TCP_TIME_WAIT) {
969 mark = inet_twsk(sk)->tw_mark;
970 /* autoflowlabel relies on buff->hash */
971 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
972 PKT_HASH_TYPE_L4);
973 } else {
974 mark = sk->sk_mark;
975 }
976 buff->tstamp = tcp_transmit_time(sk);
977 }
978 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
979 fl6.fl6_dport = t1->dest;
980 fl6.fl6_sport = t1->source;
981 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
982 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
983
984 /* Pass a socket to ip6_dst_lookup either it is for RST
985 * Underlying function will use this to retrieve the network
986 * namespace
987 */
988 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
989 if (!IS_ERR(dst)) {
990 skb_dst_set(buff, dst);
991 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
992 tclass & ~INET_ECN_MASK, priority);
993 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
994 if (rst)
995 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
996 return;
997 }
998
999 kfree_skb(buff);
1000 }
1001
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1002 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1003 {
1004 const struct tcphdr *th = tcp_hdr(skb);
1005 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1006 u32 seq = 0, ack_seq = 0;
1007 struct tcp_md5sig_key *key = NULL;
1008 #ifdef CONFIG_TCP_MD5SIG
1009 const __u8 *hash_location = NULL;
1010 unsigned char newhash[16];
1011 int genhash;
1012 struct sock *sk1 = NULL;
1013 #endif
1014 __be32 label = 0;
1015 u32 priority = 0;
1016 struct net *net;
1017 int oif = 0;
1018
1019 if (th->rst)
1020 return;
1021
1022 /* If sk not NULL, it means we did a successful lookup and incoming
1023 * route had to be correct. prequeue might have dropped our dst.
1024 */
1025 if (!sk && !ipv6_unicast_destination(skb))
1026 return;
1027
1028 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1029 #ifdef CONFIG_TCP_MD5SIG
1030 rcu_read_lock();
1031 hash_location = tcp_parse_md5sig_option(th);
1032 if (sk && sk_fullsock(sk)) {
1033 int l3index;
1034
1035 /* sdif set, means packet ingressed via a device
1036 * in an L3 domain and inet_iif is set to it.
1037 */
1038 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1039 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1040 } else if (hash_location) {
1041 int dif = tcp_v6_iif_l3_slave(skb);
1042 int sdif = tcp_v6_sdif(skb);
1043 int l3index;
1044
1045 /*
1046 * active side is lost. Try to find listening socket through
1047 * source port, and then find md5 key through listening socket.
1048 * we are not loose security here:
1049 * Incoming packet is checked with md5 hash with finding key,
1050 * no RST generated if md5 hash doesn't match.
1051 */
1052 sk1 = inet6_lookup_listener(net,
1053 &tcp_hashinfo, NULL, 0,
1054 &ipv6h->saddr,
1055 th->source, &ipv6h->daddr,
1056 ntohs(th->source), dif, sdif);
1057 if (!sk1)
1058 goto out;
1059
1060 /* sdif set, means packet ingressed via a device
1061 * in an L3 domain and dif is set to it.
1062 */
1063 l3index = tcp_v6_sdif(skb) ? dif : 0;
1064
1065 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1066 if (!key)
1067 goto out;
1068
1069 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1070 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1071 goto out;
1072 }
1073 #endif
1074
1075 if (th->ack)
1076 seq = ntohl(th->ack_seq);
1077 else
1078 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1079 (th->doff << 2);
1080
1081 if (sk) {
1082 oif = sk->sk_bound_dev_if;
1083 if (sk_fullsock(sk)) {
1084 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1085
1086 trace_tcp_send_reset(sk, skb);
1087 if (np->repflow)
1088 label = ip6_flowlabel(ipv6h);
1089 priority = sk->sk_priority;
1090 }
1091 if (sk->sk_state == TCP_TIME_WAIT) {
1092 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1093 priority = inet_twsk(sk)->tw_priority;
1094 }
1095 } else {
1096 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1097 label = ip6_flowlabel(ipv6h);
1098 }
1099
1100 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1101 ipv6_get_dsfield(ipv6h), label, priority);
1102
1103 #ifdef CONFIG_TCP_MD5SIG
1104 out:
1105 rcu_read_unlock();
1106 #endif
1107 }
1108
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1109 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1110 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1111 struct tcp_md5sig_key *key, u8 tclass,
1112 __be32 label, u32 priority)
1113 {
1114 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1115 tclass, label, priority);
1116 }
1117
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1118 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1119 {
1120 struct inet_timewait_sock *tw = inet_twsk(sk);
1121 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1122
1123 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1124 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1125 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1126 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1127 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1128
1129 inet_twsk_put(tw);
1130 }
1131
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1132 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1133 struct request_sock *req)
1134 {
1135 int l3index;
1136
1137 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1138
1139 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1140 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1141 */
1142 /* RFC 7323 2.3
1143 * The window field (SEG.WND) of every outgoing segment, with the
1144 * exception of <SYN> segments, MUST be right-shifted by
1145 * Rcv.Wind.Shift bits:
1146 */
1147 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1148 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1149 tcp_rsk(req)->rcv_nxt,
1150 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1151 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1152 req->ts_recent, sk->sk_bound_dev_if,
1153 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1154 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1155 }
1156
1157
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1158 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1159 {
1160 #ifdef CONFIG_SYN_COOKIES
1161 const struct tcphdr *th = tcp_hdr(skb);
1162
1163 if (!th->syn)
1164 sk = cookie_v6_check(sk, skb);
1165 #endif
1166 return sk;
1167 }
1168
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1169 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1170 struct tcphdr *th, u32 *cookie)
1171 {
1172 u16 mss = 0;
1173 #ifdef CONFIG_SYN_COOKIES
1174 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1175 &tcp_request_sock_ipv6_ops, sk, th);
1176 if (mss) {
1177 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1178 tcp_synq_overflow(sk);
1179 }
1180 #endif
1181 return mss;
1182 }
1183
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1184 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1185 {
1186 if (skb->protocol == htons(ETH_P_IP))
1187 return tcp_v4_conn_request(sk, skb);
1188
1189 if (!ipv6_unicast_destination(skb))
1190 goto drop;
1191
1192 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1193 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1194 return 0;
1195 }
1196
1197 return tcp_conn_request(&tcp6_request_sock_ops,
1198 &tcp_request_sock_ipv6_ops, sk, skb);
1199
1200 drop:
1201 tcp_listendrop(sk);
1202 return 0; /* don't send reset */
1203 }
1204
tcp_v6_restore_cb(struct sk_buff * skb)1205 static void tcp_v6_restore_cb(struct sk_buff *skb)
1206 {
1207 /* We need to move header back to the beginning if xfrm6_policy_check()
1208 * and tcp_v6_fill_cb() are going to be called again.
1209 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1210 */
1211 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1212 sizeof(struct inet6_skb_parm));
1213 }
1214
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1215 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1216 struct request_sock *req,
1217 struct dst_entry *dst,
1218 struct request_sock *req_unhash,
1219 bool *own_req)
1220 {
1221 struct inet_request_sock *ireq;
1222 struct ipv6_pinfo *newnp;
1223 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1224 struct ipv6_txoptions *opt;
1225 struct inet_sock *newinet;
1226 bool found_dup_sk = false;
1227 struct tcp_sock *newtp;
1228 struct sock *newsk;
1229 #ifdef CONFIG_TCP_MD5SIG
1230 struct tcp_md5sig_key *key;
1231 int l3index;
1232 #endif
1233 struct flowi6 fl6;
1234
1235 if (skb->protocol == htons(ETH_P_IP)) {
1236 /*
1237 * v6 mapped
1238 */
1239
1240 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1241 req_unhash, own_req);
1242
1243 if (!newsk)
1244 return NULL;
1245
1246 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1247
1248 newinet = inet_sk(newsk);
1249 newnp = tcp_inet6_sk(newsk);
1250 newtp = tcp_sk(newsk);
1251
1252 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1253
1254 newnp->saddr = newsk->sk_v6_rcv_saddr;
1255
1256 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1257 if (sk_is_mptcp(newsk))
1258 mptcpv6_handle_mapped(newsk, true);
1259 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1260 #ifdef CONFIG_TCP_MD5SIG
1261 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1262 #endif
1263
1264 newnp->ipv6_mc_list = NULL;
1265 newnp->ipv6_ac_list = NULL;
1266 newnp->ipv6_fl_list = NULL;
1267 newnp->pktoptions = NULL;
1268 newnp->opt = NULL;
1269 newnp->mcast_oif = inet_iif(skb);
1270 newnp->mcast_hops = ip_hdr(skb)->ttl;
1271 newnp->rcv_flowinfo = 0;
1272 if (np->repflow)
1273 newnp->flow_label = 0;
1274
1275 /*
1276 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1277 * here, tcp_create_openreq_child now does this for us, see the comment in
1278 * that function for the gory details. -acme
1279 */
1280
1281 /* It is tricky place. Until this moment IPv4 tcp
1282 worked with IPv6 icsk.icsk_af_ops.
1283 Sync it now.
1284 */
1285 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1286
1287 return newsk;
1288 }
1289
1290 ireq = inet_rsk(req);
1291
1292 if (sk_acceptq_is_full(sk))
1293 goto out_overflow;
1294
1295 if (!dst) {
1296 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1297 if (!dst)
1298 goto out;
1299 }
1300
1301 newsk = tcp_create_openreq_child(sk, req, skb);
1302 if (!newsk)
1303 goto out_nonewsk;
1304
1305 /*
1306 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1307 * count here, tcp_create_openreq_child now does this for us, see the
1308 * comment in that function for the gory details. -acme
1309 */
1310
1311 newsk->sk_gso_type = SKB_GSO_TCPV6;
1312 ip6_dst_store(newsk, dst, NULL, NULL);
1313 inet6_sk_rx_dst_set(newsk, skb);
1314
1315 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1316
1317 newtp = tcp_sk(newsk);
1318 newinet = inet_sk(newsk);
1319 newnp = tcp_inet6_sk(newsk);
1320
1321 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1322
1323 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1324 newnp->saddr = ireq->ir_v6_loc_addr;
1325 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1326 newsk->sk_bound_dev_if = ireq->ir_iif;
1327
1328 /* Now IPv6 options...
1329
1330 First: no IPv4 options.
1331 */
1332 newinet->inet_opt = NULL;
1333 newnp->ipv6_mc_list = NULL;
1334 newnp->ipv6_ac_list = NULL;
1335 newnp->ipv6_fl_list = NULL;
1336
1337 /* Clone RX bits */
1338 newnp->rxopt.all = np->rxopt.all;
1339
1340 newnp->pktoptions = NULL;
1341 newnp->opt = NULL;
1342 newnp->mcast_oif = tcp_v6_iif(skb);
1343 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1344 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1345 if (np->repflow)
1346 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1347
1348 /* Set ToS of the new socket based upon the value of incoming SYN.
1349 * ECT bits are set later in tcp_init_transfer().
1350 */
1351 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1352 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1353
1354 /* Clone native IPv6 options from listening socket (if any)
1355
1356 Yes, keeping reference count would be much more clever,
1357 but we make one more one thing there: reattach optmem
1358 to newsk.
1359 */
1360 opt = ireq->ipv6_opt;
1361 if (!opt)
1362 opt = rcu_dereference(np->opt);
1363 if (opt) {
1364 opt = ipv6_dup_options(newsk, opt);
1365 RCU_INIT_POINTER(newnp->opt, opt);
1366 }
1367 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1368 if (opt)
1369 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1370 opt->opt_flen;
1371
1372 tcp_ca_openreq_child(newsk, dst);
1373
1374 tcp_sync_mss(newsk, dst_mtu(dst));
1375 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1376
1377 tcp_initialize_rcv_mss(newsk);
1378
1379 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1380 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1381
1382 #ifdef CONFIG_TCP_MD5SIG
1383 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1384
1385 /* Copy over the MD5 key from the original socket */
1386 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1387 if (key) {
1388 /* We're using one, so create a matching key
1389 * on the newsk structure. If we fail to get
1390 * memory, then we end up not copying the key
1391 * across. Shucks.
1392 */
1393 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1394 AF_INET6, 128, l3index, key->key, key->keylen,
1395 sk_gfp_mask(sk, GFP_ATOMIC));
1396 }
1397 #endif
1398
1399 if (__inet_inherit_port(sk, newsk) < 0) {
1400 inet_csk_prepare_forced_close(newsk);
1401 tcp_done(newsk);
1402 goto out;
1403 }
1404 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1405 &found_dup_sk);
1406 if (*own_req) {
1407 tcp_move_syn(newtp, req);
1408
1409 /* Clone pktoptions received with SYN, if we own the req */
1410 if (ireq->pktopts) {
1411 newnp->pktoptions = skb_clone(ireq->pktopts,
1412 sk_gfp_mask(sk, GFP_ATOMIC));
1413 consume_skb(ireq->pktopts);
1414 ireq->pktopts = NULL;
1415 if (newnp->pktoptions) {
1416 tcp_v6_restore_cb(newnp->pktoptions);
1417 skb_set_owner_r(newnp->pktoptions, newsk);
1418 }
1419 }
1420 } else {
1421 if (!req_unhash && found_dup_sk) {
1422 /* This code path should only be executed in the
1423 * syncookie case only
1424 */
1425 bh_unlock_sock(newsk);
1426 sock_put(newsk);
1427 newsk = NULL;
1428 }
1429 }
1430
1431 return newsk;
1432
1433 out_overflow:
1434 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1435 out_nonewsk:
1436 dst_release(dst);
1437 out:
1438 tcp_listendrop(sk);
1439 return NULL;
1440 }
1441
1442 /* The socket must have it's spinlock held when we get
1443 * here, unless it is a TCP_LISTEN socket.
1444 *
1445 * We have a potential double-lock case here, so even when
1446 * doing backlog processing we use the BH locking scheme.
1447 * This is because we cannot sleep with the original spinlock
1448 * held.
1449 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1450 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1451 {
1452 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1453 struct sk_buff *opt_skb = NULL;
1454 struct tcp_sock *tp;
1455
1456 /* Imagine: socket is IPv6. IPv4 packet arrives,
1457 goes to IPv4 receive handler and backlogged.
1458 From backlog it always goes here. Kerboom...
1459 Fortunately, tcp_rcv_established and rcv_established
1460 handle them correctly, but it is not case with
1461 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1462 */
1463
1464 if (skb->protocol == htons(ETH_P_IP))
1465 return tcp_v4_do_rcv(sk, skb);
1466
1467 /*
1468 * socket locking is here for SMP purposes as backlog rcv
1469 * is currently called with bh processing disabled.
1470 */
1471
1472 /* Do Stevens' IPV6_PKTOPTIONS.
1473
1474 Yes, guys, it is the only place in our code, where we
1475 may make it not affecting IPv4.
1476 The rest of code is protocol independent,
1477 and I do not like idea to uglify IPv4.
1478
1479 Actually, all the idea behind IPV6_PKTOPTIONS
1480 looks not very well thought. For now we latch
1481 options, received in the last packet, enqueued
1482 by tcp. Feel free to propose better solution.
1483 --ANK (980728)
1484 */
1485 if (np->rxopt.all)
1486 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1487
1488 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1489 struct dst_entry *dst;
1490
1491 dst = rcu_dereference_protected(sk->sk_rx_dst,
1492 lockdep_sock_is_held(sk));
1493
1494 sock_rps_save_rxhash(sk, skb);
1495 sk_mark_napi_id(sk, skb);
1496 if (dst) {
1497 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1498 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1499 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1500 dst_release(dst);
1501 }
1502 }
1503
1504 tcp_rcv_established(sk, skb);
1505 if (opt_skb)
1506 goto ipv6_pktoptions;
1507 return 0;
1508 }
1509
1510 if (tcp_checksum_complete(skb))
1511 goto csum_err;
1512
1513 if (sk->sk_state == TCP_LISTEN) {
1514 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1515
1516 if (!nsk)
1517 goto discard;
1518
1519 if (nsk != sk) {
1520 if (tcp_child_process(sk, nsk, skb))
1521 goto reset;
1522 if (opt_skb)
1523 __kfree_skb(opt_skb);
1524 return 0;
1525 }
1526 } else
1527 sock_rps_save_rxhash(sk, skb);
1528
1529 if (tcp_rcv_state_process(sk, skb))
1530 goto reset;
1531 if (opt_skb)
1532 goto ipv6_pktoptions;
1533 return 0;
1534
1535 reset:
1536 tcp_v6_send_reset(sk, skb);
1537 discard:
1538 if (opt_skb)
1539 __kfree_skb(opt_skb);
1540 kfree_skb(skb);
1541 return 0;
1542 csum_err:
1543 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1544 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1545 goto discard;
1546
1547
1548 ipv6_pktoptions:
1549 /* Do you ask, what is it?
1550
1551 1. skb was enqueued by tcp.
1552 2. skb is added to tail of read queue, rather than out of order.
1553 3. socket is not in passive state.
1554 4. Finally, it really contains options, which user wants to receive.
1555 */
1556 tp = tcp_sk(sk);
1557 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1558 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1559 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1560 np->mcast_oif = tcp_v6_iif(opt_skb);
1561 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1562 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1563 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1564 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1565 if (np->repflow)
1566 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1567 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1568 skb_set_owner_r(opt_skb, sk);
1569 tcp_v6_restore_cb(opt_skb);
1570 opt_skb = xchg(&np->pktoptions, opt_skb);
1571 } else {
1572 __kfree_skb(opt_skb);
1573 opt_skb = xchg(&np->pktoptions, NULL);
1574 }
1575 }
1576
1577 kfree_skb(opt_skb);
1578 return 0;
1579 }
1580
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1581 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1582 const struct tcphdr *th)
1583 {
1584 /* This is tricky: we move IP6CB at its correct location into
1585 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1586 * _decode_session6() uses IP6CB().
1587 * barrier() makes sure compiler won't play aliasing games.
1588 */
1589 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1590 sizeof(struct inet6_skb_parm));
1591 barrier();
1592
1593 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1594 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1595 skb->len - th->doff*4);
1596 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1597 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1598 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1599 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1600 TCP_SKB_CB(skb)->sacked = 0;
1601 TCP_SKB_CB(skb)->has_rxtstamp =
1602 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1603 }
1604
tcp_v6_rcv(struct sk_buff * skb)1605 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1606 {
1607 struct sk_buff *skb_to_free;
1608 int sdif = inet6_sdif(skb);
1609 int dif = inet6_iif(skb);
1610 const struct tcphdr *th;
1611 const struct ipv6hdr *hdr;
1612 bool refcounted;
1613 struct sock *sk;
1614 int ret;
1615 struct net *net = dev_net(skb->dev);
1616
1617 if (skb->pkt_type != PACKET_HOST)
1618 goto discard_it;
1619
1620 /*
1621 * Count it even if it's bad.
1622 */
1623 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1624
1625 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1626 goto discard_it;
1627
1628 th = (const struct tcphdr *)skb->data;
1629
1630 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1631 goto bad_packet;
1632 if (!pskb_may_pull(skb, th->doff*4))
1633 goto discard_it;
1634
1635 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1636 goto csum_error;
1637
1638 th = (const struct tcphdr *)skb->data;
1639 hdr = ipv6_hdr(skb);
1640
1641 lookup:
1642 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1643 th->source, th->dest, inet6_iif(skb), sdif,
1644 &refcounted);
1645 if (!sk)
1646 goto no_tcp_socket;
1647
1648 process:
1649 if (sk->sk_state == TCP_TIME_WAIT)
1650 goto do_time_wait;
1651
1652 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1653 struct request_sock *req = inet_reqsk(sk);
1654 bool req_stolen = false;
1655 struct sock *nsk;
1656
1657 sk = req->rsk_listener;
1658 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1659 sk_drops_add(sk, skb);
1660 reqsk_put(req);
1661 goto discard_it;
1662 }
1663 if (tcp_checksum_complete(skb)) {
1664 reqsk_put(req);
1665 goto csum_error;
1666 }
1667 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1668 inet_csk_reqsk_queue_drop_and_put(sk, req);
1669 goto lookup;
1670 }
1671 sock_hold(sk);
1672 refcounted = true;
1673 nsk = NULL;
1674 if (!tcp_filter(sk, skb)) {
1675 th = (const struct tcphdr *)skb->data;
1676 hdr = ipv6_hdr(skb);
1677 tcp_v6_fill_cb(skb, hdr, th);
1678 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1679 }
1680 if (!nsk) {
1681 reqsk_put(req);
1682 if (req_stolen) {
1683 /* Another cpu got exclusive access to req
1684 * and created a full blown socket.
1685 * Try to feed this packet to this socket
1686 * instead of discarding it.
1687 */
1688 tcp_v6_restore_cb(skb);
1689 sock_put(sk);
1690 goto lookup;
1691 }
1692 goto discard_and_relse;
1693 }
1694 if (nsk == sk) {
1695 reqsk_put(req);
1696 tcp_v6_restore_cb(skb);
1697 } else if (tcp_child_process(sk, nsk, skb)) {
1698 tcp_v6_send_reset(nsk, skb);
1699 goto discard_and_relse;
1700 } else {
1701 sock_put(sk);
1702 return 0;
1703 }
1704 }
1705 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1706 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707 goto discard_and_relse;
1708 }
1709
1710 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1711 goto discard_and_relse;
1712
1713 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1714 goto discard_and_relse;
1715
1716 if (tcp_filter(sk, skb))
1717 goto discard_and_relse;
1718 th = (const struct tcphdr *)skb->data;
1719 hdr = ipv6_hdr(skb);
1720 tcp_v6_fill_cb(skb, hdr, th);
1721
1722 skb->dev = NULL;
1723
1724 if (sk->sk_state == TCP_LISTEN) {
1725 ret = tcp_v6_do_rcv(sk, skb);
1726 goto put_and_return;
1727 }
1728
1729 sk_incoming_cpu_update(sk);
1730
1731 bh_lock_sock_nested(sk);
1732 tcp_segs_in(tcp_sk(sk), skb);
1733 ret = 0;
1734 if (!sock_owned_by_user(sk)) {
1735 skb_to_free = sk->sk_rx_skb_cache;
1736 sk->sk_rx_skb_cache = NULL;
1737 ret = tcp_v6_do_rcv(sk, skb);
1738 } else {
1739 if (tcp_add_backlog(sk, skb))
1740 goto discard_and_relse;
1741 skb_to_free = NULL;
1742 }
1743 bh_unlock_sock(sk);
1744 if (skb_to_free)
1745 __kfree_skb(skb_to_free);
1746 put_and_return:
1747 if (refcounted)
1748 sock_put(sk);
1749 return ret ? -1 : 0;
1750
1751 no_tcp_socket:
1752 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1753 goto discard_it;
1754
1755 tcp_v6_fill_cb(skb, hdr, th);
1756
1757 if (tcp_checksum_complete(skb)) {
1758 csum_error:
1759 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1760 bad_packet:
1761 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1762 } else {
1763 tcp_v6_send_reset(NULL, skb);
1764 }
1765
1766 discard_it:
1767 kfree_skb(skb);
1768 return 0;
1769
1770 discard_and_relse:
1771 sk_drops_add(sk, skb);
1772 if (refcounted)
1773 sock_put(sk);
1774 goto discard_it;
1775
1776 do_time_wait:
1777 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1778 inet_twsk_put(inet_twsk(sk));
1779 goto discard_it;
1780 }
1781
1782 tcp_v6_fill_cb(skb, hdr, th);
1783
1784 if (tcp_checksum_complete(skb)) {
1785 inet_twsk_put(inet_twsk(sk));
1786 goto csum_error;
1787 }
1788
1789 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1790 case TCP_TW_SYN:
1791 {
1792 struct sock *sk2;
1793
1794 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1795 skb, __tcp_hdrlen(th),
1796 &ipv6_hdr(skb)->saddr, th->source,
1797 &ipv6_hdr(skb)->daddr,
1798 ntohs(th->dest),
1799 tcp_v6_iif_l3_slave(skb),
1800 sdif);
1801 if (sk2) {
1802 struct inet_timewait_sock *tw = inet_twsk(sk);
1803 inet_twsk_deschedule_put(tw);
1804 sk = sk2;
1805 tcp_v6_restore_cb(skb);
1806 refcounted = false;
1807 goto process;
1808 }
1809 }
1810 /* to ACK */
1811 fallthrough;
1812 case TCP_TW_ACK:
1813 tcp_v6_timewait_ack(sk, skb);
1814 break;
1815 case TCP_TW_RST:
1816 tcp_v6_send_reset(sk, skb);
1817 inet_twsk_deschedule_put(inet_twsk(sk));
1818 goto discard_it;
1819 case TCP_TW_SUCCESS:
1820 ;
1821 }
1822 goto discard_it;
1823 }
1824
tcp_v6_early_demux(struct sk_buff * skb)1825 void tcp_v6_early_demux(struct sk_buff *skb)
1826 {
1827 const struct ipv6hdr *hdr;
1828 const struct tcphdr *th;
1829 struct sock *sk;
1830
1831 if (skb->pkt_type != PACKET_HOST)
1832 return;
1833
1834 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1835 return;
1836
1837 hdr = ipv6_hdr(skb);
1838 th = tcp_hdr(skb);
1839
1840 if (th->doff < sizeof(struct tcphdr) / 4)
1841 return;
1842
1843 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1844 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1845 &hdr->saddr, th->source,
1846 &hdr->daddr, ntohs(th->dest),
1847 inet6_iif(skb), inet6_sdif(skb));
1848 if (sk) {
1849 skb->sk = sk;
1850 skb->destructor = sock_edemux;
1851 if (sk_fullsock(sk)) {
1852 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1853
1854 if (dst)
1855 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1856 if (dst &&
1857 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1858 skb_dst_set_noref(skb, dst);
1859 }
1860 }
1861 }
1862
1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1865 .twsk_unique = tcp_twsk_unique,
1866 .twsk_destructor = tcp_twsk_destructor,
1867 };
1868
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1869 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1870 {
1871 struct ipv6_pinfo *np = inet6_sk(sk);
1872
1873 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1874 }
1875
1876 const struct inet_connection_sock_af_ops ipv6_specific = {
1877 .queue_xmit = inet6_csk_xmit,
1878 .send_check = tcp_v6_send_check,
1879 .rebuild_header = inet6_sk_rebuild_header,
1880 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1881 .conn_request = tcp_v6_conn_request,
1882 .syn_recv_sock = tcp_v6_syn_recv_sock,
1883 .net_header_len = sizeof(struct ipv6hdr),
1884 .net_frag_header_len = sizeof(struct frag_hdr),
1885 .setsockopt = ipv6_setsockopt,
1886 .getsockopt = ipv6_getsockopt,
1887 .addr2sockaddr = inet6_csk_addr2sockaddr,
1888 .sockaddr_len = sizeof(struct sockaddr_in6),
1889 .mtu_reduced = tcp_v6_mtu_reduced,
1890 };
1891
1892 #ifdef CONFIG_TCP_MD5SIG
1893 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1894 .md5_lookup = tcp_v6_md5_lookup,
1895 .calc_md5_hash = tcp_v6_md5_hash_skb,
1896 .md5_parse = tcp_v6_parse_md5_keys,
1897 };
1898 #endif
1899
1900 /*
1901 * TCP over IPv4 via INET6 API
1902 */
1903 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1904 .queue_xmit = ip_queue_xmit,
1905 .send_check = tcp_v4_send_check,
1906 .rebuild_header = inet_sk_rebuild_header,
1907 .sk_rx_dst_set = inet_sk_rx_dst_set,
1908 .conn_request = tcp_v6_conn_request,
1909 .syn_recv_sock = tcp_v6_syn_recv_sock,
1910 .net_header_len = sizeof(struct iphdr),
1911 .setsockopt = ipv6_setsockopt,
1912 .getsockopt = ipv6_getsockopt,
1913 .addr2sockaddr = inet6_csk_addr2sockaddr,
1914 .sockaddr_len = sizeof(struct sockaddr_in6),
1915 .mtu_reduced = tcp_v4_mtu_reduced,
1916 };
1917
1918 #ifdef CONFIG_TCP_MD5SIG
1919 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1920 .md5_lookup = tcp_v4_md5_lookup,
1921 .calc_md5_hash = tcp_v4_md5_hash_skb,
1922 .md5_parse = tcp_v6_parse_md5_keys,
1923 };
1924 #endif
1925
1926 /* NOTE: A lot of things set to zero explicitly by call to
1927 * sk_alloc() so need not be done here.
1928 */
tcp_v6_init_sock(struct sock * sk)1929 static int tcp_v6_init_sock(struct sock *sk)
1930 {
1931 struct inet_connection_sock *icsk = inet_csk(sk);
1932
1933 tcp_init_sock(sk);
1934
1935 icsk->icsk_af_ops = &ipv6_specific;
1936
1937 #ifdef CONFIG_TCP_MD5SIG
1938 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1939 #endif
1940
1941 return 0;
1942 }
1943
tcp_v6_destroy_sock(struct sock * sk)1944 static void tcp_v6_destroy_sock(struct sock *sk)
1945 {
1946 tcp_v4_destroy_sock(sk);
1947 inet6_destroy_sock(sk);
1948 }
1949
1950 #ifdef CONFIG_PROC_FS
1951 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1952 static void get_openreq6(struct seq_file *seq,
1953 const struct request_sock *req, int i)
1954 {
1955 long ttd = req->rsk_timer.expires - jiffies;
1956 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1957 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1958
1959 if (ttd < 0)
1960 ttd = 0;
1961
1962 seq_printf(seq,
1963 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1964 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1965 i,
1966 src->s6_addr32[0], src->s6_addr32[1],
1967 src->s6_addr32[2], src->s6_addr32[3],
1968 inet_rsk(req)->ir_num,
1969 dest->s6_addr32[0], dest->s6_addr32[1],
1970 dest->s6_addr32[2], dest->s6_addr32[3],
1971 ntohs(inet_rsk(req)->ir_rmt_port),
1972 TCP_SYN_RECV,
1973 0, 0, /* could print option size, but that is af dependent. */
1974 1, /* timers active (only the expire timer) */
1975 jiffies_to_clock_t(ttd),
1976 req->num_timeout,
1977 from_kuid_munged(seq_user_ns(seq),
1978 sock_i_uid(req->rsk_listener)),
1979 0, /* non standard timer */
1980 0, /* open_requests have no inode */
1981 0, req);
1982 }
1983
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1984 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1985 {
1986 const struct in6_addr *dest, *src;
1987 __u16 destp, srcp;
1988 int timer_active;
1989 unsigned long timer_expires;
1990 const struct inet_sock *inet = inet_sk(sp);
1991 const struct tcp_sock *tp = tcp_sk(sp);
1992 const struct inet_connection_sock *icsk = inet_csk(sp);
1993 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1994 int rx_queue;
1995 int state;
1996
1997 dest = &sp->sk_v6_daddr;
1998 src = &sp->sk_v6_rcv_saddr;
1999 destp = ntohs(inet->inet_dport);
2000 srcp = ntohs(inet->inet_sport);
2001
2002 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2003 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2004 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2005 timer_active = 1;
2006 timer_expires = icsk->icsk_timeout;
2007 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2008 timer_active = 4;
2009 timer_expires = icsk->icsk_timeout;
2010 } else if (timer_pending(&sp->sk_timer)) {
2011 timer_active = 2;
2012 timer_expires = sp->sk_timer.expires;
2013 } else {
2014 timer_active = 0;
2015 timer_expires = jiffies;
2016 }
2017
2018 state = inet_sk_state_load(sp);
2019 if (state == TCP_LISTEN)
2020 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2021 else
2022 /* Because we don't lock the socket,
2023 * we might find a transient negative value.
2024 */
2025 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2026 READ_ONCE(tp->copied_seq), 0);
2027
2028 seq_printf(seq,
2029 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2030 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2031 i,
2032 src->s6_addr32[0], src->s6_addr32[1],
2033 src->s6_addr32[2], src->s6_addr32[3], srcp,
2034 dest->s6_addr32[0], dest->s6_addr32[1],
2035 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2036 state,
2037 READ_ONCE(tp->write_seq) - tp->snd_una,
2038 rx_queue,
2039 timer_active,
2040 jiffies_delta_to_clock_t(timer_expires - jiffies),
2041 icsk->icsk_retransmits,
2042 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2043 icsk->icsk_probes_out,
2044 sock_i_ino(sp),
2045 refcount_read(&sp->sk_refcnt), sp,
2046 jiffies_to_clock_t(icsk->icsk_rto),
2047 jiffies_to_clock_t(icsk->icsk_ack.ato),
2048 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2049 tp->snd_cwnd,
2050 state == TCP_LISTEN ?
2051 fastopenq->max_qlen :
2052 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2053 );
2054 }
2055
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2056 static void get_timewait6_sock(struct seq_file *seq,
2057 struct inet_timewait_sock *tw, int i)
2058 {
2059 long delta = tw->tw_timer.expires - jiffies;
2060 const struct in6_addr *dest, *src;
2061 __u16 destp, srcp;
2062
2063 dest = &tw->tw_v6_daddr;
2064 src = &tw->tw_v6_rcv_saddr;
2065 destp = ntohs(tw->tw_dport);
2066 srcp = ntohs(tw->tw_sport);
2067
2068 seq_printf(seq,
2069 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2071 i,
2072 src->s6_addr32[0], src->s6_addr32[1],
2073 src->s6_addr32[2], src->s6_addr32[3], srcp,
2074 dest->s6_addr32[0], dest->s6_addr32[1],
2075 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2076 tw->tw_substate, 0, 0,
2077 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2078 refcount_read(&tw->tw_refcnt), tw);
2079 }
2080
tcp6_seq_show(struct seq_file * seq,void * v)2081 static int tcp6_seq_show(struct seq_file *seq, void *v)
2082 {
2083 struct tcp_iter_state *st;
2084 struct sock *sk = v;
2085
2086 if (v == SEQ_START_TOKEN) {
2087 seq_puts(seq,
2088 " sl "
2089 "local_address "
2090 "remote_address "
2091 "st tx_queue rx_queue tr tm->when retrnsmt"
2092 " uid timeout inode\n");
2093 goto out;
2094 }
2095 st = seq->private;
2096
2097 if (sk->sk_state == TCP_TIME_WAIT)
2098 get_timewait6_sock(seq, v, st->num);
2099 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2100 get_openreq6(seq, v, st->num);
2101 else
2102 get_tcp6_sock(seq, v, st->num);
2103 out:
2104 return 0;
2105 }
2106
2107 static const struct seq_operations tcp6_seq_ops = {
2108 .show = tcp6_seq_show,
2109 .start = tcp_seq_start,
2110 .next = tcp_seq_next,
2111 .stop = tcp_seq_stop,
2112 };
2113
2114 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2115 .family = AF_INET6,
2116 };
2117
tcp6_proc_init(struct net * net)2118 int __net_init tcp6_proc_init(struct net *net)
2119 {
2120 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2121 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2122 return -ENOMEM;
2123 return 0;
2124 }
2125
tcp6_proc_exit(struct net * net)2126 void tcp6_proc_exit(struct net *net)
2127 {
2128 remove_proc_entry("tcp6", net->proc_net);
2129 }
2130 #endif
2131
2132 struct proto tcpv6_prot = {
2133 .name = "TCPv6",
2134 .owner = THIS_MODULE,
2135 .close = tcp_close,
2136 .pre_connect = tcp_v6_pre_connect,
2137 .connect = tcp_v6_connect,
2138 .disconnect = tcp_disconnect,
2139 .accept = inet_csk_accept,
2140 .ioctl = tcp_ioctl,
2141 .init = tcp_v6_init_sock,
2142 .destroy = tcp_v6_destroy_sock,
2143 .shutdown = tcp_shutdown,
2144 .setsockopt = tcp_setsockopt,
2145 .getsockopt = tcp_getsockopt,
2146 .keepalive = tcp_set_keepalive,
2147 .recvmsg = tcp_recvmsg,
2148 .sendmsg = tcp_sendmsg,
2149 .sendpage = tcp_sendpage,
2150 .backlog_rcv = tcp_v6_do_rcv,
2151 .release_cb = tcp_release_cb,
2152 .hash = inet6_hash,
2153 .unhash = inet_unhash,
2154 .get_port = inet_csk_get_port,
2155 .enter_memory_pressure = tcp_enter_memory_pressure,
2156 .leave_memory_pressure = tcp_leave_memory_pressure,
2157 .stream_memory_free = tcp_stream_memory_free,
2158 .sockets_allocated = &tcp_sockets_allocated,
2159 .memory_allocated = &tcp_memory_allocated,
2160 .memory_pressure = &tcp_memory_pressure,
2161 .orphan_count = &tcp_orphan_count,
2162 .sysctl_mem = sysctl_tcp_mem,
2163 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2164 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2165 .max_header = MAX_TCP_HEADER,
2166 .obj_size = sizeof(struct tcp6_sock),
2167 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2168 .twsk_prot = &tcp6_timewait_sock_ops,
2169 .rsk_prot = &tcp6_request_sock_ops,
2170 .h.hashinfo = &tcp_hashinfo,
2171 .no_autobind = true,
2172 .diag_destroy = tcp_abort,
2173 };
2174 EXPORT_SYMBOL_GPL(tcpv6_prot);
2175
2176 static const struct inet6_protocol tcpv6_protocol = {
2177 .handler = tcp_v6_rcv,
2178 .err_handler = tcp_v6_err,
2179 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2180 };
2181
2182 static struct inet_protosw tcpv6_protosw = {
2183 .type = SOCK_STREAM,
2184 .protocol = IPPROTO_TCP,
2185 .prot = &tcpv6_prot,
2186 .ops = &inet6_stream_ops,
2187 .flags = INET_PROTOSW_PERMANENT |
2188 INET_PROTOSW_ICSK,
2189 };
2190
tcpv6_net_init(struct net * net)2191 static int __net_init tcpv6_net_init(struct net *net)
2192 {
2193 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2194 SOCK_RAW, IPPROTO_TCP, net);
2195 }
2196
tcpv6_net_exit(struct net * net)2197 static void __net_exit tcpv6_net_exit(struct net *net)
2198 {
2199 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2200 }
2201
tcpv6_net_exit_batch(struct list_head * net_exit_list)2202 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2203 {
2204 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2205 }
2206
2207 static struct pernet_operations tcpv6_net_ops = {
2208 .init = tcpv6_net_init,
2209 .exit = tcpv6_net_exit,
2210 .exit_batch = tcpv6_net_exit_batch,
2211 };
2212
tcpv6_init(void)2213 int __init tcpv6_init(void)
2214 {
2215 int ret;
2216
2217 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2218 if (ret)
2219 goto out;
2220
2221 /* register inet6 protocol */
2222 ret = inet6_register_protosw(&tcpv6_protosw);
2223 if (ret)
2224 goto out_tcpv6_protocol;
2225
2226 ret = register_pernet_subsys(&tcpv6_net_ops);
2227 if (ret)
2228 goto out_tcpv6_protosw;
2229
2230 ret = mptcpv6_init();
2231 if (ret)
2232 goto out_tcpv6_pernet_subsys;
2233
2234 out:
2235 return ret;
2236
2237 out_tcpv6_pernet_subsys:
2238 unregister_pernet_subsys(&tcpv6_net_ops);
2239 out_tcpv6_protosw:
2240 inet6_unregister_protosw(&tcpv6_protosw);
2241 out_tcpv6_protocol:
2242 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2243 goto out;
2244 }
2245
tcpv6_exit(void)2246 void tcpv6_exit(void)
2247 {
2248 unregister_pernet_subsys(&tcpv6_net_ops);
2249 inet6_unregister_protosw(&tcpv6_protosw);
2250 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2251 }
2252