1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 struct dst_entry *dst = skb_dst(skb);
106
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 }
114 }
115
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->dest,
121 tcp_hdr(skb)->source);
122 }
123
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 int addr_len)
132 {
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
136 */
137 if (addr_len < SIN6_LEN_RFC2133)
138 return -EINVAL;
139
140 sock_owned_by_me(sk);
141
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 int addr_len)
147 {
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
155 struct flowi6 fl6;
156 struct dst_entry *dst;
157 int addr_type;
158 int err;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161 if (addr_len < SIN6_LEN_RFC2133)
162 return -EINVAL;
163
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
166
167 memset(&fl6, 0, sizeof(fl6));
168
169 if (np->sndflow) {
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
176 return -EINVAL;
177 fl6_sock_release(flowlabel);
178 }
179 }
180
181 /*
182 * connect() to INADDR_ANY means loopback (BSD'ism).
183 */
184
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 &usin->sin6_addr);
189 else
190 usin->sin6_addr = in6addr_loopback;
191 }
192
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195 if (addr_type & IPV6_ADDR_MULTICAST)
196 return -ENETUNREACH;
197
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
202 * must coincide.
203 */
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 return -EINVAL;
206
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
208 }
209
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
212 return -EINVAL;
213 }
214
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
220 }
221
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
224
225 /*
226 * TCP over IPv4
227 */
228
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
232
233 if (__ipv6_only_sock(sk))
234 return -ENETUNREACH;
235
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
241 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
242 if (sk_is_mptcp(sk))
243 mptcpv6_handle_mapped(sk, true);
244 sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247 #endif
248
249 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250
251 if (err) {
252 icsk->icsk_ext_hdr_len = exthdrlen;
253 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
254 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
255 if (sk_is_mptcp(sk))
256 mptcpv6_handle_mapped(sk, false);
257 sk->sk_backlog_rcv = tcp_v6_do_rcv;
258 #ifdef CONFIG_TCP_MD5SIG
259 tp->af_specific = &tcp_sock_ipv6_specific;
260 #endif
261 goto failure;
262 }
263 np->saddr = sk->sk_v6_rcv_saddr;
264
265 return err;
266 }
267
268 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269 saddr = &sk->sk_v6_rcv_saddr;
270
271 fl6.flowi6_proto = IPPROTO_TCP;
272 fl6.daddr = sk->sk_v6_daddr;
273 fl6.saddr = saddr ? *saddr : np->saddr;
274 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
275 fl6.flowi6_oif = sk->sk_bound_dev_if;
276 fl6.flowi6_mark = sk->sk_mark;
277 fl6.fl6_dport = usin->sin6_port;
278 fl6.fl6_sport = inet->inet_sport;
279 fl6.flowi6_uid = sk->sk_uid;
280
281 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282 final_p = fl6_update_dst(&fl6, opt, &final);
283
284 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285
286 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
287 if (IS_ERR(dst)) {
288 err = PTR_ERR(dst);
289 goto failure;
290 }
291
292 if (!saddr) {
293 saddr = &fl6.saddr;
294 sk->sk_v6_rcv_saddr = *saddr;
295 }
296
297 /* set the source address */
298 np->saddr = *saddr;
299 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300
301 sk->sk_gso_type = SKB_GSO_TCPV6;
302 ip6_dst_store(sk, dst, NULL, NULL);
303
304 icsk->icsk_ext_hdr_len = 0;
305 if (opt)
306 icsk->icsk_ext_hdr_len = opt->opt_flen +
307 opt->opt_nflen;
308
309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310
311 inet->inet_dport = usin->sin6_port;
312
313 tcp_set_state(sk, TCP_SYN_SENT);
314 err = inet6_hash_connect(tcp_death_row, sk);
315 if (err)
316 goto late_failure;
317
318 sk_set_txhash(sk);
319
320 if (likely(!tp->repair)) {
321 if (!tp->write_seq)
322 WRITE_ONCE(tp->write_seq,
323 secure_tcpv6_seq(np->saddr.s6_addr32,
324 sk->sk_v6_daddr.s6_addr32,
325 inet->inet_sport,
326 inet->inet_dport));
327 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
328 np->saddr.s6_addr32,
329 sk->sk_v6_daddr.s6_addr32);
330 }
331
332 if (tcp_fastopen_defer_connect(sk, &err))
333 return err;
334 if (err)
335 goto late_failure;
336
337 err = tcp_connect(sk);
338 if (err)
339 goto late_failure;
340
341 return 0;
342
343 late_failure:
344 tcp_set_state(sk, TCP_CLOSE);
345 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
346 inet_reset_saddr(sk);
347 failure:
348 inet->inet_dport = 0;
349 sk->sk_route_caps = 0;
350 return err;
351 }
352
tcp_v6_mtu_reduced(struct sock * sk)353 static void tcp_v6_mtu_reduced(struct sock *sk)
354 {
355 struct dst_entry *dst;
356 u32 mtu;
357
358 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359 return;
360
361 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362
363 /* Drop requests trying to increase our current mss.
364 * Check done in __ip6_rt_update_pmtu() is too late.
365 */
366 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367 return;
368
369 dst = inet6_csk_update_pmtu(sk, mtu);
370 if (!dst)
371 return;
372
373 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374 tcp_sync_mss(sk, dst_mtu(dst));
375 tcp_simple_retransmit(sk);
376 }
377 }
378
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380 u8 type, u8 code, int offset, __be32 info)
381 {
382 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384 struct net *net = dev_net(skb->dev);
385 struct request_sock *fastopen;
386 struct ipv6_pinfo *np;
387 struct tcp_sock *tp;
388 __u32 seq, snd_una;
389 struct sock *sk;
390 bool fatal;
391 int err;
392
393 sk = __inet6_lookup_established(net, &tcp_hashinfo,
394 &hdr->daddr, th->dest,
395 &hdr->saddr, ntohs(th->source),
396 skb->dev->ifindex, inet6_sdif(skb));
397
398 if (!sk) {
399 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400 ICMP6_MIB_INERRORS);
401 return -ENOENT;
402 }
403
404 if (sk->sk_state == TCP_TIME_WAIT) {
405 inet_twsk_put(inet_twsk(sk));
406 return 0;
407 }
408 seq = ntohl(th->seq);
409 fatal = icmpv6_err_convert(type, code, &err);
410 if (sk->sk_state == TCP_NEW_SYN_RECV) {
411 tcp_req_err(sk, seq, fatal);
412 return 0;
413 }
414
415 bh_lock_sock(sk);
416 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418
419 if (sk->sk_state == TCP_CLOSE)
420 goto out;
421
422 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
423 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424 goto out;
425 }
426
427 tp = tcp_sk(sk);
428 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
429 fastopen = rcu_dereference(tp->fastopen_rsk);
430 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
431 if (sk->sk_state != TCP_LISTEN &&
432 !between(seq, snd_una, tp->snd_nxt)) {
433 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
434 goto out;
435 }
436
437 np = tcp_inet6_sk(sk);
438
439 if (type == NDISC_REDIRECT) {
440 if (!sock_owned_by_user(sk)) {
441 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
442
443 if (dst)
444 dst->ops->redirect(dst, sk, skb);
445 }
446 goto out;
447 }
448
449 if (type == ICMPV6_PKT_TOOBIG) {
450 u32 mtu = ntohl(info);
451
452 /* We are not interested in TCP_LISTEN and open_requests
453 * (SYN-ACKs send out by Linux are always <576bytes so
454 * they should go through unfragmented).
455 */
456 if (sk->sk_state == TCP_LISTEN)
457 goto out;
458
459 if (!ip6_sk_accept_pmtu(sk))
460 goto out;
461
462 if (mtu < IPV6_MIN_MTU)
463 goto out;
464
465 WRITE_ONCE(tp->mtu_info, mtu);
466
467 if (!sock_owned_by_user(sk))
468 tcp_v6_mtu_reduced(sk);
469 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
470 &sk->sk_tsq_flags))
471 sock_hold(sk);
472 goto out;
473 }
474
475
476 /* Might be for an request_sock */
477 switch (sk->sk_state) {
478 case TCP_SYN_SENT:
479 case TCP_SYN_RECV:
480 /* Only in fast or simultaneous open. If a fast open socket is
481 * already accepted it is treated as a connected one below.
482 */
483 if (fastopen && !fastopen->sk)
484 break;
485
486 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
487
488 if (!sock_owned_by_user(sk)) {
489 sk->sk_err = err;
490 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
491
492 tcp_done(sk);
493 } else
494 sk->sk_err_soft = err;
495 goto out;
496 case TCP_LISTEN:
497 break;
498 default:
499 /* check if this ICMP message allows revert of backoff.
500 * (see RFC 6069)
501 */
502 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
503 code == ICMPV6_NOROUTE)
504 tcp_ld_RTO_revert(sk, seq);
505 }
506
507 if (!sock_owned_by_user(sk) && np->recverr) {
508 sk->sk_err = err;
509 sk->sk_error_report(sk);
510 } else
511 sk->sk_err_soft = err;
512
513 out:
514 bh_unlock_sock(sk);
515 sock_put(sk);
516 return 0;
517 }
518
519
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)520 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
521 struct flowi *fl,
522 struct request_sock *req,
523 struct tcp_fastopen_cookie *foc,
524 enum tcp_synack_type synack_type,
525 struct sk_buff *syn_skb)
526 {
527 struct inet_request_sock *ireq = inet_rsk(req);
528 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
529 struct ipv6_txoptions *opt;
530 struct flowi6 *fl6 = &fl->u.ip6;
531 struct sk_buff *skb;
532 int err = -ENOMEM;
533 u8 tclass;
534
535 /* First, grab a route. */
536 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
537 IPPROTO_TCP)) == NULL)
538 goto done;
539
540 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
541
542 if (skb) {
543 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
544 &ireq->ir_v6_rmt_addr);
545
546 fl6->daddr = ireq->ir_v6_rmt_addr;
547 if (np->repflow && ireq->pktopts)
548 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
549
550 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
551 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
552 (np->tclass & INET_ECN_MASK) :
553 np->tclass;
554
555 if (!INET_ECN_is_capable(tclass) &&
556 tcp_bpf_ca_needs_ecn((struct sock *)req))
557 tclass |= INET_ECN_ECT_0;
558
559 rcu_read_lock();
560 opt = ireq->ipv6_opt;
561 if (!opt)
562 opt = rcu_dereference(np->opt);
563 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
564 tclass, sk->sk_priority);
565 rcu_read_unlock();
566 err = net_xmit_eval(err);
567 }
568
569 done:
570 return err;
571 }
572
573
tcp_v6_reqsk_destructor(struct request_sock * req)574 static void tcp_v6_reqsk_destructor(struct request_sock *req)
575 {
576 kfree(inet_rsk(req)->ipv6_opt);
577 kfree_skb(inet_rsk(req)->pktopts);
578 }
579
580 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)581 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
582 const struct in6_addr *addr,
583 int l3index)
584 {
585 return tcp_md5_do_lookup(sk, l3index,
586 (union tcp_md5_addr *)addr, AF_INET6);
587 }
588
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)589 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
590 const struct sock *addr_sk)
591 {
592 int l3index;
593
594 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
595 addr_sk->sk_bound_dev_if);
596 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
597 l3index);
598 }
599
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)600 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
601 sockptr_t optval, int optlen)
602 {
603 struct tcp_md5sig cmd;
604 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
605 int l3index = 0;
606 u8 prefixlen;
607
608 if (optlen < sizeof(cmd))
609 return -EINVAL;
610
611 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612 return -EFAULT;
613
614 if (sin6->sin6_family != AF_INET6)
615 return -EINVAL;
616
617 if (optname == TCP_MD5SIG_EXT &&
618 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
619 prefixlen = cmd.tcpm_prefixlen;
620 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
621 prefixlen > 32))
622 return -EINVAL;
623 } else {
624 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
625 }
626
627 if (optname == TCP_MD5SIG_EXT &&
628 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
629 struct net_device *dev;
630
631 rcu_read_lock();
632 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
633 if (dev && netif_is_l3_master(dev))
634 l3index = dev->ifindex;
635 rcu_read_unlock();
636
637 /* ok to reference set/not set outside of rcu;
638 * right now device MUST be an L3 master
639 */
640 if (!dev || !l3index)
641 return -EINVAL;
642 }
643
644 if (!cmd.tcpm_keylen) {
645 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
646 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
647 AF_INET, prefixlen,
648 l3index);
649 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
650 AF_INET6, prefixlen, l3index);
651 }
652
653 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
654 return -EINVAL;
655
656 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
657 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
658 AF_INET, prefixlen, l3index,
659 cmd.tcpm_key, cmd.tcpm_keylen,
660 GFP_KERNEL);
661
662 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663 AF_INET6, prefixlen, l3index,
664 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
665 }
666
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668 const struct in6_addr *daddr,
669 const struct in6_addr *saddr,
670 const struct tcphdr *th, int nbytes)
671 {
672 struct tcp6_pseudohdr *bp;
673 struct scatterlist sg;
674 struct tcphdr *_th;
675
676 bp = hp->scratch;
677 /* 1. TCP pseudo-header (RFC2460) */
678 bp->saddr = *saddr;
679 bp->daddr = *daddr;
680 bp->protocol = cpu_to_be32(IPPROTO_TCP);
681 bp->len = cpu_to_be32(nbytes);
682
683 _th = (struct tcphdr *)(bp + 1);
684 memcpy(_th, th, sizeof(*th));
685 _th->check = 0;
686
687 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689 sizeof(*bp) + sizeof(*th));
690 return crypto_ahash_update(hp->md5_req);
691 }
692
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694 const struct in6_addr *daddr, struct in6_addr *saddr,
695 const struct tcphdr *th)
696 {
697 struct tcp_md5sig_pool *hp;
698 struct ahash_request *req;
699
700 hp = tcp_get_md5sig_pool();
701 if (!hp)
702 goto clear_hash_noput;
703 req = hp->md5_req;
704
705 if (crypto_ahash_init(req))
706 goto clear_hash;
707 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708 goto clear_hash;
709 if (tcp_md5_hash_key(hp, key))
710 goto clear_hash;
711 ahash_request_set_crypt(req, NULL, md5_hash, 0);
712 if (crypto_ahash_final(req))
713 goto clear_hash;
714
715 tcp_put_md5sig_pool();
716 return 0;
717
718 clear_hash:
719 tcp_put_md5sig_pool();
720 clear_hash_noput:
721 memset(md5_hash, 0, 16);
722 return 1;
723 }
724
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726 const struct tcp_md5sig_key *key,
727 const struct sock *sk,
728 const struct sk_buff *skb)
729 {
730 const struct in6_addr *saddr, *daddr;
731 struct tcp_md5sig_pool *hp;
732 struct ahash_request *req;
733 const struct tcphdr *th = tcp_hdr(skb);
734
735 if (sk) { /* valid for establish/request sockets */
736 saddr = &sk->sk_v6_rcv_saddr;
737 daddr = &sk->sk_v6_daddr;
738 } else {
739 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 saddr = &ip6h->saddr;
741 daddr = &ip6h->daddr;
742 }
743
744 hp = tcp_get_md5sig_pool();
745 if (!hp)
746 goto clear_hash_noput;
747 req = hp->md5_req;
748
749 if (crypto_ahash_init(req))
750 goto clear_hash;
751
752 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753 goto clear_hash;
754 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755 goto clear_hash;
756 if (tcp_md5_hash_key(hp, key))
757 goto clear_hash;
758 ahash_request_set_crypt(req, NULL, md5_hash, 0);
759 if (crypto_ahash_final(req))
760 goto clear_hash;
761
762 tcp_put_md5sig_pool();
763 return 0;
764
765 clear_hash:
766 tcp_put_md5sig_pool();
767 clear_hash_noput:
768 memset(md5_hash, 0, 16);
769 return 1;
770 }
771
772 #endif
773
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)774 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
775 const struct sk_buff *skb,
776 int dif, int sdif)
777 {
778 #ifdef CONFIG_TCP_MD5SIG
779 const __u8 *hash_location = NULL;
780 struct tcp_md5sig_key *hash_expected;
781 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
782 const struct tcphdr *th = tcp_hdr(skb);
783 int genhash, l3index;
784 u8 newhash[16];
785
786 /* sdif set, means packet ingressed via a device
787 * in an L3 domain and dif is set to the l3mdev
788 */
789 l3index = sdif ? dif : 0;
790
791 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
792 hash_location = tcp_parse_md5sig_option(th);
793
794 /* We've parsed the options - do we have a hash? */
795 if (!hash_expected && !hash_location)
796 return false;
797
798 if (hash_expected && !hash_location) {
799 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
800 return true;
801 }
802
803 if (!hash_expected && hash_location) {
804 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
805 return true;
806 }
807
808 /* check the signature */
809 genhash = tcp_v6_md5_hash_skb(newhash,
810 hash_expected,
811 NULL, skb);
812
813 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
814 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
815 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
816 genhash ? "failed" : "mismatch",
817 &ip6h->saddr, ntohs(th->source),
818 &ip6h->daddr, ntohs(th->dest), l3index);
819 return true;
820 }
821 #endif
822 return false;
823 }
824
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)825 static void tcp_v6_init_req(struct request_sock *req,
826 const struct sock *sk_listener,
827 struct sk_buff *skb)
828 {
829 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
830 struct inet_request_sock *ireq = inet_rsk(req);
831 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
832
833 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
834 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
835
836 /* So that link locals have meaning */
837 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
838 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
839 ireq->ir_iif = tcp_v6_iif(skb);
840
841 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
842 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
843 np->rxopt.bits.rxinfo ||
844 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
845 np->rxopt.bits.rxohlim || np->repflow)) {
846 refcount_inc(&skb->users);
847 ireq->pktopts = skb;
848 }
849 }
850
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)851 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
852 struct flowi *fl,
853 const struct request_sock *req)
854 {
855 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
856 }
857
858 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
859 .family = AF_INET6,
860 .obj_size = sizeof(struct tcp6_request_sock),
861 .rtx_syn_ack = tcp_rtx_synack,
862 .send_ack = tcp_v6_reqsk_send_ack,
863 .destructor = tcp_v6_reqsk_destructor,
864 .send_reset = tcp_v6_send_reset,
865 .syn_ack_timeout = tcp_syn_ack_timeout,
866 };
867
868 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
869 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
870 sizeof(struct ipv6hdr),
871 #ifdef CONFIG_TCP_MD5SIG
872 .req_md5_lookup = tcp_v6_md5_lookup,
873 .calc_md5_hash = tcp_v6_md5_hash_skb,
874 #endif
875 .init_req = tcp_v6_init_req,
876 #ifdef CONFIG_SYN_COOKIES
877 .cookie_init_seq = cookie_v6_init_sequence,
878 #endif
879 .route_req = tcp_v6_route_req,
880 .init_seq = tcp_v6_init_seq,
881 .init_ts_off = tcp_v6_init_ts_off,
882 .send_synack = tcp_v6_send_synack,
883 };
884
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)885 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886 u32 ack, u32 win, u32 tsval, u32 tsecr,
887 int oif, struct tcp_md5sig_key *key, int rst,
888 u8 tclass, __be32 label, u32 priority)
889 {
890 const struct tcphdr *th = tcp_hdr(skb);
891 struct tcphdr *t1;
892 struct sk_buff *buff;
893 struct flowi6 fl6;
894 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895 struct sock *ctl_sk = net->ipv6.tcp_sk;
896 unsigned int tot_len = sizeof(struct tcphdr);
897 struct dst_entry *dst;
898 __be32 *topt;
899 __u32 mark = 0;
900
901 if (tsecr)
902 tot_len += TCPOLEN_TSTAMP_ALIGNED;
903 #ifdef CONFIG_TCP_MD5SIG
904 if (key)
905 tot_len += TCPOLEN_MD5SIG_ALIGNED;
906 #endif
907
908 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
909 GFP_ATOMIC);
910 if (!buff)
911 return;
912
913 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
914
915 t1 = skb_push(buff, tot_len);
916 skb_reset_transport_header(buff);
917
918 /* Swap the send and the receive. */
919 memset(t1, 0, sizeof(*t1));
920 t1->dest = th->source;
921 t1->source = th->dest;
922 t1->doff = tot_len / 4;
923 t1->seq = htonl(seq);
924 t1->ack_seq = htonl(ack);
925 t1->ack = !rst || !th->ack;
926 t1->rst = rst;
927 t1->window = htons(win);
928
929 topt = (__be32 *)(t1 + 1);
930
931 if (tsecr) {
932 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
933 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
934 *topt++ = htonl(tsval);
935 *topt++ = htonl(tsecr);
936 }
937
938 #ifdef CONFIG_TCP_MD5SIG
939 if (key) {
940 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
941 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
942 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
943 &ipv6_hdr(skb)->saddr,
944 &ipv6_hdr(skb)->daddr, t1);
945 }
946 #endif
947
948 memset(&fl6, 0, sizeof(fl6));
949 fl6.daddr = ipv6_hdr(skb)->saddr;
950 fl6.saddr = ipv6_hdr(skb)->daddr;
951 fl6.flowlabel = label;
952
953 buff->ip_summed = CHECKSUM_PARTIAL;
954 buff->csum = 0;
955
956 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
957
958 fl6.flowi6_proto = IPPROTO_TCP;
959 if (rt6_need_strict(&fl6.daddr) && !oif)
960 fl6.flowi6_oif = tcp_v6_iif(skb);
961 else {
962 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
963 oif = skb->skb_iif;
964
965 fl6.flowi6_oif = oif;
966 }
967
968 if (sk) {
969 if (sk->sk_state == TCP_TIME_WAIT) {
970 mark = inet_twsk(sk)->tw_mark;
971 /* autoflowlabel relies on buff->hash */
972 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
973 PKT_HASH_TYPE_L4);
974 } else {
975 mark = sk->sk_mark;
976 }
977 buff->tstamp = tcp_transmit_time(sk);
978 }
979 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
980 fl6.fl6_dport = t1->dest;
981 fl6.fl6_sport = t1->source;
982 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
983 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
984
985 /* Pass a socket to ip6_dst_lookup either it is for RST
986 * Underlying function will use this to retrieve the network
987 * namespace
988 */
989 if (sk && sk->sk_state != TCP_TIME_WAIT)
990 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
991 else
992 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
993 if (!IS_ERR(dst)) {
994 skb_dst_set(buff, dst);
995 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
996 tclass & ~INET_ECN_MASK, priority);
997 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
998 if (rst)
999 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1000 return;
1001 }
1002
1003 kfree_skb(buff);
1004 }
1005
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)1006 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1007 {
1008 const struct tcphdr *th = tcp_hdr(skb);
1009 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1010 u32 seq = 0, ack_seq = 0;
1011 struct tcp_md5sig_key *key = NULL;
1012 #ifdef CONFIG_TCP_MD5SIG
1013 const __u8 *hash_location = NULL;
1014 unsigned char newhash[16];
1015 int genhash;
1016 struct sock *sk1 = NULL;
1017 #endif
1018 __be32 label = 0;
1019 u32 priority = 0;
1020 struct net *net;
1021 int oif = 0;
1022
1023 if (th->rst)
1024 return;
1025
1026 /* If sk not NULL, it means we did a successful lookup and incoming
1027 * route had to be correct. prequeue might have dropped our dst.
1028 */
1029 if (!sk && !ipv6_unicast_destination(skb))
1030 return;
1031
1032 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1033 #ifdef CONFIG_TCP_MD5SIG
1034 rcu_read_lock();
1035 hash_location = tcp_parse_md5sig_option(th);
1036 if (sk && sk_fullsock(sk)) {
1037 int l3index;
1038
1039 /* sdif set, means packet ingressed via a device
1040 * in an L3 domain and inet_iif is set to it.
1041 */
1042 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1043 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1044 } else if (hash_location) {
1045 int dif = tcp_v6_iif_l3_slave(skb);
1046 int sdif = tcp_v6_sdif(skb);
1047 int l3index;
1048
1049 /*
1050 * active side is lost. Try to find listening socket through
1051 * source port, and then find md5 key through listening socket.
1052 * we are not loose security here:
1053 * Incoming packet is checked with md5 hash with finding key,
1054 * no RST generated if md5 hash doesn't match.
1055 */
1056 sk1 = inet6_lookup_listener(net,
1057 &tcp_hashinfo, NULL, 0,
1058 &ipv6h->saddr,
1059 th->source, &ipv6h->daddr,
1060 ntohs(th->source), dif, sdif);
1061 if (!sk1)
1062 goto out;
1063
1064 /* sdif set, means packet ingressed via a device
1065 * in an L3 domain and dif is set to it.
1066 */
1067 l3index = tcp_v6_sdif(skb) ? dif : 0;
1068
1069 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1070 if (!key)
1071 goto out;
1072
1073 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1074 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1075 goto out;
1076 }
1077 #endif
1078
1079 if (th->ack)
1080 seq = ntohl(th->ack_seq);
1081 else
1082 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1083 (th->doff << 2);
1084
1085 if (sk) {
1086 oif = sk->sk_bound_dev_if;
1087 if (sk_fullsock(sk)) {
1088 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1089
1090 trace_tcp_send_reset(sk, skb);
1091 if (np->repflow)
1092 label = ip6_flowlabel(ipv6h);
1093 priority = sk->sk_priority;
1094 }
1095 if (sk->sk_state == TCP_TIME_WAIT) {
1096 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1097 priority = inet_twsk(sk)->tw_priority;
1098 }
1099 } else {
1100 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1101 label = ip6_flowlabel(ipv6h);
1102 }
1103
1104 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1105 ipv6_get_dsfield(ipv6h), label, priority);
1106
1107 #ifdef CONFIG_TCP_MD5SIG
1108 out:
1109 rcu_read_unlock();
1110 #endif
1111 }
1112
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1113 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1114 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1115 struct tcp_md5sig_key *key, u8 tclass,
1116 __be32 label, u32 priority)
1117 {
1118 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1119 tclass, label, priority);
1120 }
1121
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1122 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1123 {
1124 struct inet_timewait_sock *tw = inet_twsk(sk);
1125 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1126
1127 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1128 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1129 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1130 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1131 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1132
1133 inet_twsk_put(tw);
1134 }
1135
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1136 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1137 struct request_sock *req)
1138 {
1139 int l3index;
1140
1141 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1142
1143 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1144 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1145 */
1146 /* RFC 7323 2.3
1147 * The window field (SEG.WND) of every outgoing segment, with the
1148 * exception of <SYN> segments, MUST be right-shifted by
1149 * Rcv.Wind.Shift bits:
1150 */
1151 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1152 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1153 tcp_rsk(req)->rcv_nxt,
1154 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1155 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1156 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1157 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1158 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1159 }
1160
1161
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1162 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1163 {
1164 #ifdef CONFIG_SYN_COOKIES
1165 const struct tcphdr *th = tcp_hdr(skb);
1166
1167 if (!th->syn)
1168 sk = cookie_v6_check(sk, skb);
1169 #endif
1170 return sk;
1171 }
1172
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1173 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1174 struct tcphdr *th, u32 *cookie)
1175 {
1176 u16 mss = 0;
1177 #ifdef CONFIG_SYN_COOKIES
1178 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1179 &tcp_request_sock_ipv6_ops, sk, th);
1180 if (mss) {
1181 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1182 tcp_synq_overflow(sk);
1183 }
1184 #endif
1185 return mss;
1186 }
1187
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1188 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1189 {
1190 if (skb->protocol == htons(ETH_P_IP))
1191 return tcp_v4_conn_request(sk, skb);
1192
1193 if (!ipv6_unicast_destination(skb))
1194 goto drop;
1195
1196 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1197 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1198 return 0;
1199 }
1200
1201 return tcp_conn_request(&tcp6_request_sock_ops,
1202 &tcp_request_sock_ipv6_ops, sk, skb);
1203
1204 drop:
1205 tcp_listendrop(sk);
1206 return 0; /* don't send reset */
1207 }
1208
tcp_v6_restore_cb(struct sk_buff * skb)1209 static void tcp_v6_restore_cb(struct sk_buff *skb)
1210 {
1211 /* We need to move header back to the beginning if xfrm6_policy_check()
1212 * and tcp_v6_fill_cb() are going to be called again.
1213 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1214 */
1215 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1216 sizeof(struct inet6_skb_parm));
1217 }
1218
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1219 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1220 struct request_sock *req,
1221 struct dst_entry *dst,
1222 struct request_sock *req_unhash,
1223 bool *own_req)
1224 {
1225 struct inet_request_sock *ireq;
1226 struct ipv6_pinfo *newnp;
1227 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1228 struct ipv6_txoptions *opt;
1229 struct inet_sock *newinet;
1230 bool found_dup_sk = false;
1231 struct tcp_sock *newtp;
1232 struct sock *newsk;
1233 #ifdef CONFIG_TCP_MD5SIG
1234 struct tcp_md5sig_key *key;
1235 int l3index;
1236 #endif
1237 struct flowi6 fl6;
1238
1239 if (skb->protocol == htons(ETH_P_IP)) {
1240 /*
1241 * v6 mapped
1242 */
1243
1244 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1245 req_unhash, own_req);
1246
1247 if (!newsk)
1248 return NULL;
1249
1250 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1251
1252 newinet = inet_sk(newsk);
1253 newnp = tcp_inet6_sk(newsk);
1254 newtp = tcp_sk(newsk);
1255
1256 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1257
1258 newnp->saddr = newsk->sk_v6_rcv_saddr;
1259
1260 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1261 if (sk_is_mptcp(newsk))
1262 mptcpv6_handle_mapped(newsk, true);
1263 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1264 #ifdef CONFIG_TCP_MD5SIG
1265 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1266 #endif
1267
1268 newnp->ipv6_mc_list = NULL;
1269 newnp->ipv6_ac_list = NULL;
1270 newnp->ipv6_fl_list = NULL;
1271 newnp->pktoptions = NULL;
1272 newnp->opt = NULL;
1273 newnp->mcast_oif = inet_iif(skb);
1274 newnp->mcast_hops = ip_hdr(skb)->ttl;
1275 newnp->rcv_flowinfo = 0;
1276 if (np->repflow)
1277 newnp->flow_label = 0;
1278
1279 /*
1280 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1281 * here, tcp_create_openreq_child now does this for us, see the comment in
1282 * that function for the gory details. -acme
1283 */
1284
1285 /* It is tricky place. Until this moment IPv4 tcp
1286 worked with IPv6 icsk.icsk_af_ops.
1287 Sync it now.
1288 */
1289 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1290
1291 return newsk;
1292 }
1293
1294 ireq = inet_rsk(req);
1295
1296 if (sk_acceptq_is_full(sk))
1297 goto out_overflow;
1298
1299 if (!dst) {
1300 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1301 if (!dst)
1302 goto out;
1303 }
1304
1305 newsk = tcp_create_openreq_child(sk, req, skb);
1306 if (!newsk)
1307 goto out_nonewsk;
1308
1309 /*
1310 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1311 * count here, tcp_create_openreq_child now does this for us, see the
1312 * comment in that function for the gory details. -acme
1313 */
1314
1315 newsk->sk_gso_type = SKB_GSO_TCPV6;
1316 ip6_dst_store(newsk, dst, NULL, NULL);
1317 inet6_sk_rx_dst_set(newsk, skb);
1318
1319 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1320
1321 newtp = tcp_sk(newsk);
1322 newinet = inet_sk(newsk);
1323 newnp = tcp_inet6_sk(newsk);
1324
1325 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1326
1327 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1328 newnp->saddr = ireq->ir_v6_loc_addr;
1329 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1330 newsk->sk_bound_dev_if = ireq->ir_iif;
1331
1332 /* Now IPv6 options...
1333
1334 First: no IPv4 options.
1335 */
1336 newinet->inet_opt = NULL;
1337 newnp->ipv6_mc_list = NULL;
1338 newnp->ipv6_ac_list = NULL;
1339 newnp->ipv6_fl_list = NULL;
1340
1341 /* Clone RX bits */
1342 newnp->rxopt.all = np->rxopt.all;
1343
1344 newnp->pktoptions = NULL;
1345 newnp->opt = NULL;
1346 newnp->mcast_oif = tcp_v6_iif(skb);
1347 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1348 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1349 if (np->repflow)
1350 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1351
1352 /* Set ToS of the new socket based upon the value of incoming SYN.
1353 * ECT bits are set later in tcp_init_transfer().
1354 */
1355 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1356 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1357
1358 /* Clone native IPv6 options from listening socket (if any)
1359
1360 Yes, keeping reference count would be much more clever,
1361 but we make one more one thing there: reattach optmem
1362 to newsk.
1363 */
1364 opt = ireq->ipv6_opt;
1365 if (!opt)
1366 opt = rcu_dereference(np->opt);
1367 if (opt) {
1368 opt = ipv6_dup_options(newsk, opt);
1369 RCU_INIT_POINTER(newnp->opt, opt);
1370 }
1371 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1372 if (opt)
1373 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1374 opt->opt_flen;
1375
1376 tcp_ca_openreq_child(newsk, dst);
1377
1378 tcp_sync_mss(newsk, dst_mtu(dst));
1379 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1380
1381 tcp_initialize_rcv_mss(newsk);
1382
1383 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1384 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1385
1386 #ifdef CONFIG_TCP_MD5SIG
1387 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1388
1389 /* Copy over the MD5 key from the original socket */
1390 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1391 if (key) {
1392 /* We're using one, so create a matching key
1393 * on the newsk structure. If we fail to get
1394 * memory, then we end up not copying the key
1395 * across. Shucks.
1396 */
1397 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1398 AF_INET6, 128, l3index, key->key, key->keylen,
1399 sk_gfp_mask(sk, GFP_ATOMIC));
1400 }
1401 #endif
1402
1403 if (__inet_inherit_port(sk, newsk) < 0) {
1404 inet_csk_prepare_forced_close(newsk);
1405 tcp_done(newsk);
1406 goto out;
1407 }
1408 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1409 &found_dup_sk);
1410 if (*own_req) {
1411 tcp_move_syn(newtp, req);
1412
1413 /* Clone pktoptions received with SYN, if we own the req */
1414 if (ireq->pktopts) {
1415 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1416 consume_skb(ireq->pktopts);
1417 ireq->pktopts = NULL;
1418 if (newnp->pktoptions)
1419 tcp_v6_restore_cb(newnp->pktoptions);
1420 }
1421 } else {
1422 if (!req_unhash && found_dup_sk) {
1423 /* This code path should only be executed in the
1424 * syncookie case only
1425 */
1426 bh_unlock_sock(newsk);
1427 sock_put(newsk);
1428 newsk = NULL;
1429 }
1430 }
1431
1432 return newsk;
1433
1434 out_overflow:
1435 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1436 out_nonewsk:
1437 dst_release(dst);
1438 out:
1439 tcp_listendrop(sk);
1440 return NULL;
1441 }
1442
1443 /* The socket must have it's spinlock held when we get
1444 * here, unless it is a TCP_LISTEN socket.
1445 *
1446 * We have a potential double-lock case here, so even when
1447 * doing backlog processing we use the BH locking scheme.
1448 * This is because we cannot sleep with the original spinlock
1449 * held.
1450 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1451 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1452 {
1453 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1454 struct sk_buff *opt_skb = NULL;
1455 struct tcp_sock *tp;
1456
1457 /* Imagine: socket is IPv6. IPv4 packet arrives,
1458 goes to IPv4 receive handler and backlogged.
1459 From backlog it always goes here. Kerboom...
1460 Fortunately, tcp_rcv_established and rcv_established
1461 handle them correctly, but it is not case with
1462 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1463 */
1464
1465 if (skb->protocol == htons(ETH_P_IP))
1466 return tcp_v4_do_rcv(sk, skb);
1467
1468 /*
1469 * socket locking is here for SMP purposes as backlog rcv
1470 * is currently called with bh processing disabled.
1471 */
1472
1473 /* Do Stevens' IPV6_PKTOPTIONS.
1474
1475 Yes, guys, it is the only place in our code, where we
1476 may make it not affecting IPv4.
1477 The rest of code is protocol independent,
1478 and I do not like idea to uglify IPv4.
1479
1480 Actually, all the idea behind IPV6_PKTOPTIONS
1481 looks not very well thought. For now we latch
1482 options, received in the last packet, enqueued
1483 by tcp. Feel free to propose better solution.
1484 --ANK (980728)
1485 */
1486 if (np->rxopt.all)
1487 opt_skb = skb_clone_and_charge_r(skb, sk);
1488
1489 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1490 struct dst_entry *dst;
1491
1492 dst = rcu_dereference_protected(sk->sk_rx_dst,
1493 lockdep_sock_is_held(sk));
1494
1495 sock_rps_save_rxhash(sk, skb);
1496 sk_mark_napi_id(sk, skb);
1497 if (dst) {
1498 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1499 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1500 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1501 dst_release(dst);
1502 }
1503 }
1504
1505 tcp_rcv_established(sk, skb);
1506 if (opt_skb)
1507 goto ipv6_pktoptions;
1508 return 0;
1509 }
1510
1511 if (tcp_checksum_complete(skb))
1512 goto csum_err;
1513
1514 if (sk->sk_state == TCP_LISTEN) {
1515 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1516
1517 if (!nsk)
1518 goto discard;
1519
1520 if (nsk != sk) {
1521 if (tcp_child_process(sk, nsk, skb))
1522 goto reset;
1523 if (opt_skb)
1524 __kfree_skb(opt_skb);
1525 return 0;
1526 }
1527 } else
1528 sock_rps_save_rxhash(sk, skb);
1529
1530 if (tcp_rcv_state_process(sk, skb))
1531 goto reset;
1532 if (opt_skb)
1533 goto ipv6_pktoptions;
1534 return 0;
1535
1536 reset:
1537 tcp_v6_send_reset(sk, skb);
1538 discard:
1539 if (opt_skb)
1540 __kfree_skb(opt_skb);
1541 kfree_skb(skb);
1542 return 0;
1543 csum_err:
1544 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1545 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1546 goto discard;
1547
1548
1549 ipv6_pktoptions:
1550 /* Do you ask, what is it?
1551
1552 1. skb was enqueued by tcp.
1553 2. skb is added to tail of read queue, rather than out of order.
1554 3. socket is not in passive state.
1555 4. Finally, it really contains options, which user wants to receive.
1556 */
1557 tp = tcp_sk(sk);
1558 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1559 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1560 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1561 np->mcast_oif = tcp_v6_iif(opt_skb);
1562 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1563 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1564 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1565 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1566 if (np->repflow)
1567 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1568 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1569 tcp_v6_restore_cb(opt_skb);
1570 opt_skb = xchg(&np->pktoptions, opt_skb);
1571 } else {
1572 __kfree_skb(opt_skb);
1573 opt_skb = xchg(&np->pktoptions, NULL);
1574 }
1575 }
1576
1577 kfree_skb(opt_skb);
1578 return 0;
1579 }
1580
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1581 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1582 const struct tcphdr *th)
1583 {
1584 /* This is tricky: we move IP6CB at its correct location into
1585 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1586 * _decode_session6() uses IP6CB().
1587 * barrier() makes sure compiler won't play aliasing games.
1588 */
1589 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1590 sizeof(struct inet6_skb_parm));
1591 barrier();
1592
1593 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1594 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1595 skb->len - th->doff*4);
1596 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1597 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1598 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1599 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1600 TCP_SKB_CB(skb)->sacked = 0;
1601 TCP_SKB_CB(skb)->has_rxtstamp =
1602 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1603 }
1604
tcp_v6_rcv(struct sk_buff * skb)1605 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1606 {
1607 struct sk_buff *skb_to_free;
1608 int sdif = inet6_sdif(skb);
1609 int dif = inet6_iif(skb);
1610 const struct tcphdr *th;
1611 const struct ipv6hdr *hdr;
1612 bool refcounted;
1613 struct sock *sk;
1614 int ret;
1615 struct net *net = dev_net(skb->dev);
1616
1617 if (skb->pkt_type != PACKET_HOST)
1618 goto discard_it;
1619
1620 /*
1621 * Count it even if it's bad.
1622 */
1623 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1624
1625 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1626 goto discard_it;
1627
1628 th = (const struct tcphdr *)skb->data;
1629
1630 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1631 goto bad_packet;
1632 if (!pskb_may_pull(skb, th->doff*4))
1633 goto discard_it;
1634
1635 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1636 goto csum_error;
1637
1638 th = (const struct tcphdr *)skb->data;
1639 hdr = ipv6_hdr(skb);
1640
1641 lookup:
1642 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1643 th->source, th->dest, inet6_iif(skb), sdif,
1644 &refcounted);
1645 if (!sk)
1646 goto no_tcp_socket;
1647
1648 process:
1649 if (sk->sk_state == TCP_TIME_WAIT)
1650 goto do_time_wait;
1651
1652 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1653 struct request_sock *req = inet_reqsk(sk);
1654 bool req_stolen = false;
1655 struct sock *nsk;
1656
1657 sk = req->rsk_listener;
1658 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1659 sk_drops_add(sk, skb);
1660 reqsk_put(req);
1661 goto discard_it;
1662 }
1663 if (tcp_checksum_complete(skb)) {
1664 reqsk_put(req);
1665 goto csum_error;
1666 }
1667 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1668 inet_csk_reqsk_queue_drop_and_put(sk, req);
1669 goto lookup;
1670 }
1671 sock_hold(sk);
1672 refcounted = true;
1673 nsk = NULL;
1674 if (!tcp_filter(sk, skb)) {
1675 th = (const struct tcphdr *)skb->data;
1676 hdr = ipv6_hdr(skb);
1677 tcp_v6_fill_cb(skb, hdr, th);
1678 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1679 }
1680 if (!nsk) {
1681 reqsk_put(req);
1682 if (req_stolen) {
1683 /* Another cpu got exclusive access to req
1684 * and created a full blown socket.
1685 * Try to feed this packet to this socket
1686 * instead of discarding it.
1687 */
1688 tcp_v6_restore_cb(skb);
1689 sock_put(sk);
1690 goto lookup;
1691 }
1692 goto discard_and_relse;
1693 }
1694 if (nsk == sk) {
1695 reqsk_put(req);
1696 tcp_v6_restore_cb(skb);
1697 } else if (tcp_child_process(sk, nsk, skb)) {
1698 tcp_v6_send_reset(nsk, skb);
1699 goto discard_and_relse;
1700 } else {
1701 sock_put(sk);
1702 return 0;
1703 }
1704 }
1705 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1706 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707 goto discard_and_relse;
1708 }
1709
1710 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1711 goto discard_and_relse;
1712
1713 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1714 goto discard_and_relse;
1715
1716 if (tcp_filter(sk, skb))
1717 goto discard_and_relse;
1718 th = (const struct tcphdr *)skb->data;
1719 hdr = ipv6_hdr(skb);
1720 tcp_v6_fill_cb(skb, hdr, th);
1721
1722 skb->dev = NULL;
1723
1724 if (sk->sk_state == TCP_LISTEN) {
1725 ret = tcp_v6_do_rcv(sk, skb);
1726 goto put_and_return;
1727 }
1728
1729 sk_incoming_cpu_update(sk);
1730
1731 bh_lock_sock_nested(sk);
1732 tcp_segs_in(tcp_sk(sk), skb);
1733 ret = 0;
1734 if (!sock_owned_by_user(sk)) {
1735 skb_to_free = sk->sk_rx_skb_cache;
1736 sk->sk_rx_skb_cache = NULL;
1737 ret = tcp_v6_do_rcv(sk, skb);
1738 } else {
1739 if (tcp_add_backlog(sk, skb))
1740 goto discard_and_relse;
1741 skb_to_free = NULL;
1742 }
1743 bh_unlock_sock(sk);
1744 if (skb_to_free)
1745 __kfree_skb(skb_to_free);
1746 put_and_return:
1747 if (refcounted)
1748 sock_put(sk);
1749 return ret ? -1 : 0;
1750
1751 no_tcp_socket:
1752 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1753 goto discard_it;
1754
1755 tcp_v6_fill_cb(skb, hdr, th);
1756
1757 if (tcp_checksum_complete(skb)) {
1758 csum_error:
1759 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1760 bad_packet:
1761 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1762 } else {
1763 tcp_v6_send_reset(NULL, skb);
1764 }
1765
1766 discard_it:
1767 kfree_skb(skb);
1768 return 0;
1769
1770 discard_and_relse:
1771 sk_drops_add(sk, skb);
1772 if (refcounted)
1773 sock_put(sk);
1774 goto discard_it;
1775
1776 do_time_wait:
1777 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1778 inet_twsk_put(inet_twsk(sk));
1779 goto discard_it;
1780 }
1781
1782 tcp_v6_fill_cb(skb, hdr, th);
1783
1784 if (tcp_checksum_complete(skb)) {
1785 inet_twsk_put(inet_twsk(sk));
1786 goto csum_error;
1787 }
1788
1789 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1790 case TCP_TW_SYN:
1791 {
1792 struct sock *sk2;
1793
1794 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1795 skb, __tcp_hdrlen(th),
1796 &ipv6_hdr(skb)->saddr, th->source,
1797 &ipv6_hdr(skb)->daddr,
1798 ntohs(th->dest),
1799 tcp_v6_iif_l3_slave(skb),
1800 sdif);
1801 if (sk2) {
1802 struct inet_timewait_sock *tw = inet_twsk(sk);
1803 inet_twsk_deschedule_put(tw);
1804 sk = sk2;
1805 tcp_v6_restore_cb(skb);
1806 refcounted = false;
1807 goto process;
1808 }
1809 }
1810 /* to ACK */
1811 fallthrough;
1812 case TCP_TW_ACK:
1813 tcp_v6_timewait_ack(sk, skb);
1814 break;
1815 case TCP_TW_RST:
1816 tcp_v6_send_reset(sk, skb);
1817 inet_twsk_deschedule_put(inet_twsk(sk));
1818 goto discard_it;
1819 case TCP_TW_SUCCESS:
1820 ;
1821 }
1822 goto discard_it;
1823 }
1824
tcp_v6_early_demux(struct sk_buff * skb)1825 void tcp_v6_early_demux(struct sk_buff *skb)
1826 {
1827 const struct ipv6hdr *hdr;
1828 const struct tcphdr *th;
1829 struct sock *sk;
1830
1831 if (skb->pkt_type != PACKET_HOST)
1832 return;
1833
1834 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1835 return;
1836
1837 hdr = ipv6_hdr(skb);
1838 th = tcp_hdr(skb);
1839
1840 if (th->doff < sizeof(struct tcphdr) / 4)
1841 return;
1842
1843 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1844 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1845 &hdr->saddr, th->source,
1846 &hdr->daddr, ntohs(th->dest),
1847 inet6_iif(skb), inet6_sdif(skb));
1848 if (sk) {
1849 skb->sk = sk;
1850 skb->destructor = sock_edemux;
1851 if (sk_fullsock(sk)) {
1852 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1853
1854 if (dst)
1855 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1856 if (dst &&
1857 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1858 skb_dst_set_noref(skb, dst);
1859 }
1860 }
1861 }
1862
1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1865 .twsk_unique = tcp_twsk_unique,
1866 .twsk_destructor = tcp_twsk_destructor,
1867 };
1868
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1869 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1870 {
1871 struct ipv6_pinfo *np = inet6_sk(sk);
1872
1873 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1874 }
1875
1876 const struct inet_connection_sock_af_ops ipv6_specific = {
1877 .queue_xmit = inet6_csk_xmit,
1878 .send_check = tcp_v6_send_check,
1879 .rebuild_header = inet6_sk_rebuild_header,
1880 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1881 .conn_request = tcp_v6_conn_request,
1882 .syn_recv_sock = tcp_v6_syn_recv_sock,
1883 .net_header_len = sizeof(struct ipv6hdr),
1884 .net_frag_header_len = sizeof(struct frag_hdr),
1885 .setsockopt = ipv6_setsockopt,
1886 .getsockopt = ipv6_getsockopt,
1887 .addr2sockaddr = inet6_csk_addr2sockaddr,
1888 .sockaddr_len = sizeof(struct sockaddr_in6),
1889 .mtu_reduced = tcp_v6_mtu_reduced,
1890 };
1891
1892 #ifdef CONFIG_TCP_MD5SIG
1893 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1894 .md5_lookup = tcp_v6_md5_lookup,
1895 .calc_md5_hash = tcp_v6_md5_hash_skb,
1896 .md5_parse = tcp_v6_parse_md5_keys,
1897 };
1898 #endif
1899
1900 /*
1901 * TCP over IPv4 via INET6 API
1902 */
1903 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1904 .queue_xmit = ip_queue_xmit,
1905 .send_check = tcp_v4_send_check,
1906 .rebuild_header = inet_sk_rebuild_header,
1907 .sk_rx_dst_set = inet_sk_rx_dst_set,
1908 .conn_request = tcp_v6_conn_request,
1909 .syn_recv_sock = tcp_v6_syn_recv_sock,
1910 .net_header_len = sizeof(struct iphdr),
1911 .setsockopt = ipv6_setsockopt,
1912 .getsockopt = ipv6_getsockopt,
1913 .addr2sockaddr = inet6_csk_addr2sockaddr,
1914 .sockaddr_len = sizeof(struct sockaddr_in6),
1915 .mtu_reduced = tcp_v4_mtu_reduced,
1916 };
1917
1918 #ifdef CONFIG_TCP_MD5SIG
1919 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1920 .md5_lookup = tcp_v4_md5_lookup,
1921 .calc_md5_hash = tcp_v4_md5_hash_skb,
1922 .md5_parse = tcp_v6_parse_md5_keys,
1923 };
1924 #endif
1925
1926 /* NOTE: A lot of things set to zero explicitly by call to
1927 * sk_alloc() so need not be done here.
1928 */
tcp_v6_init_sock(struct sock * sk)1929 static int tcp_v6_init_sock(struct sock *sk)
1930 {
1931 struct inet_connection_sock *icsk = inet_csk(sk);
1932
1933 tcp_init_sock(sk);
1934
1935 icsk->icsk_af_ops = &ipv6_specific;
1936
1937 #ifdef CONFIG_TCP_MD5SIG
1938 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1939 #endif
1940
1941 return 0;
1942 }
1943
1944 #ifdef CONFIG_PROC_FS
1945 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1946 static void get_openreq6(struct seq_file *seq,
1947 const struct request_sock *req, int i)
1948 {
1949 long ttd = req->rsk_timer.expires - jiffies;
1950 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1951 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1952
1953 if (ttd < 0)
1954 ttd = 0;
1955
1956 seq_printf(seq,
1957 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1958 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1959 i,
1960 src->s6_addr32[0], src->s6_addr32[1],
1961 src->s6_addr32[2], src->s6_addr32[3],
1962 inet_rsk(req)->ir_num,
1963 dest->s6_addr32[0], dest->s6_addr32[1],
1964 dest->s6_addr32[2], dest->s6_addr32[3],
1965 ntohs(inet_rsk(req)->ir_rmt_port),
1966 TCP_SYN_RECV,
1967 0, 0, /* could print option size, but that is af dependent. */
1968 1, /* timers active (only the expire timer) */
1969 jiffies_to_clock_t(ttd),
1970 req->num_timeout,
1971 from_kuid_munged(seq_user_ns(seq),
1972 sock_i_uid(req->rsk_listener)),
1973 0, /* non standard timer */
1974 0, /* open_requests have no inode */
1975 0, req);
1976 }
1977
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1978 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1979 {
1980 const struct in6_addr *dest, *src;
1981 __u16 destp, srcp;
1982 int timer_active;
1983 unsigned long timer_expires;
1984 const struct inet_sock *inet = inet_sk(sp);
1985 const struct tcp_sock *tp = tcp_sk(sp);
1986 const struct inet_connection_sock *icsk = inet_csk(sp);
1987 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1988 int rx_queue;
1989 int state;
1990
1991 dest = &sp->sk_v6_daddr;
1992 src = &sp->sk_v6_rcv_saddr;
1993 destp = ntohs(inet->inet_dport);
1994 srcp = ntohs(inet->inet_sport);
1995
1996 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1997 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1998 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1999 timer_active = 1;
2000 timer_expires = icsk->icsk_timeout;
2001 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2002 timer_active = 4;
2003 timer_expires = icsk->icsk_timeout;
2004 } else if (timer_pending(&sp->sk_timer)) {
2005 timer_active = 2;
2006 timer_expires = sp->sk_timer.expires;
2007 } else {
2008 timer_active = 0;
2009 timer_expires = jiffies;
2010 }
2011
2012 state = inet_sk_state_load(sp);
2013 if (state == TCP_LISTEN)
2014 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2015 else
2016 /* Because we don't lock the socket,
2017 * we might find a transient negative value.
2018 */
2019 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2020 READ_ONCE(tp->copied_seq), 0);
2021
2022 seq_printf(seq,
2023 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2024 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2025 i,
2026 src->s6_addr32[0], src->s6_addr32[1],
2027 src->s6_addr32[2], src->s6_addr32[3], srcp,
2028 dest->s6_addr32[0], dest->s6_addr32[1],
2029 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2030 state,
2031 READ_ONCE(tp->write_seq) - tp->snd_una,
2032 rx_queue,
2033 timer_active,
2034 jiffies_delta_to_clock_t(timer_expires - jiffies),
2035 icsk->icsk_retransmits,
2036 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2037 icsk->icsk_probes_out,
2038 sock_i_ino(sp),
2039 refcount_read(&sp->sk_refcnt), sp,
2040 jiffies_to_clock_t(icsk->icsk_rto),
2041 jiffies_to_clock_t(icsk->icsk_ack.ato),
2042 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2043 tp->snd_cwnd,
2044 state == TCP_LISTEN ?
2045 fastopenq->max_qlen :
2046 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2047 );
2048 }
2049
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2050 static void get_timewait6_sock(struct seq_file *seq,
2051 struct inet_timewait_sock *tw, int i)
2052 {
2053 long delta = tw->tw_timer.expires - jiffies;
2054 const struct in6_addr *dest, *src;
2055 __u16 destp, srcp;
2056
2057 dest = &tw->tw_v6_daddr;
2058 src = &tw->tw_v6_rcv_saddr;
2059 destp = ntohs(tw->tw_dport);
2060 srcp = ntohs(tw->tw_sport);
2061
2062 seq_printf(seq,
2063 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2064 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2065 i,
2066 src->s6_addr32[0], src->s6_addr32[1],
2067 src->s6_addr32[2], src->s6_addr32[3], srcp,
2068 dest->s6_addr32[0], dest->s6_addr32[1],
2069 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2070 tw->tw_substate, 0, 0,
2071 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2072 refcount_read(&tw->tw_refcnt), tw);
2073 }
2074
tcp6_seq_show(struct seq_file * seq,void * v)2075 static int tcp6_seq_show(struct seq_file *seq, void *v)
2076 {
2077 struct tcp_iter_state *st;
2078 struct sock *sk = v;
2079
2080 if (v == SEQ_START_TOKEN) {
2081 seq_puts(seq,
2082 " sl "
2083 "local_address "
2084 "remote_address "
2085 "st tx_queue rx_queue tr tm->when retrnsmt"
2086 " uid timeout inode\n");
2087 goto out;
2088 }
2089 st = seq->private;
2090
2091 if (sk->sk_state == TCP_TIME_WAIT)
2092 get_timewait6_sock(seq, v, st->num);
2093 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2094 get_openreq6(seq, v, st->num);
2095 else
2096 get_tcp6_sock(seq, v, st->num);
2097 out:
2098 return 0;
2099 }
2100
2101 static const struct seq_operations tcp6_seq_ops = {
2102 .show = tcp6_seq_show,
2103 .start = tcp_seq_start,
2104 .next = tcp_seq_next,
2105 .stop = tcp_seq_stop,
2106 };
2107
2108 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2109 .family = AF_INET6,
2110 };
2111
tcp6_proc_init(struct net * net)2112 int __net_init tcp6_proc_init(struct net *net)
2113 {
2114 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2115 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2116 return -ENOMEM;
2117 return 0;
2118 }
2119
tcp6_proc_exit(struct net * net)2120 void tcp6_proc_exit(struct net *net)
2121 {
2122 remove_proc_entry("tcp6", net->proc_net);
2123 }
2124 #endif
2125
2126 struct proto tcpv6_prot = {
2127 .name = "TCPv6",
2128 .owner = THIS_MODULE,
2129 .close = tcp_close,
2130 .pre_connect = tcp_v6_pre_connect,
2131 .connect = tcp_v6_connect,
2132 .disconnect = tcp_disconnect,
2133 .accept = inet_csk_accept,
2134 .ioctl = tcp_ioctl,
2135 .init = tcp_v6_init_sock,
2136 .destroy = tcp_v4_destroy_sock,
2137 .shutdown = tcp_shutdown,
2138 .setsockopt = tcp_setsockopt,
2139 .getsockopt = tcp_getsockopt,
2140 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2141 .keepalive = tcp_set_keepalive,
2142 .recvmsg = tcp_recvmsg,
2143 .sendmsg = tcp_sendmsg,
2144 .sendpage = tcp_sendpage,
2145 .backlog_rcv = tcp_v6_do_rcv,
2146 .release_cb = tcp_release_cb,
2147 .hash = inet6_hash,
2148 .unhash = inet_unhash,
2149 .get_port = inet_csk_get_port,
2150 .enter_memory_pressure = tcp_enter_memory_pressure,
2151 .leave_memory_pressure = tcp_leave_memory_pressure,
2152 .stream_memory_free = tcp_stream_memory_free,
2153 .sockets_allocated = &tcp_sockets_allocated,
2154 .memory_allocated = &tcp_memory_allocated,
2155 .memory_pressure = &tcp_memory_pressure,
2156 .orphan_count = &tcp_orphan_count,
2157 .sysctl_mem = sysctl_tcp_mem,
2158 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2159 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2160 .max_header = MAX_TCP_HEADER,
2161 .obj_size = sizeof(struct tcp6_sock),
2162 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2163 .twsk_prot = &tcp6_timewait_sock_ops,
2164 .rsk_prot = &tcp6_request_sock_ops,
2165 .h.hashinfo = &tcp_hashinfo,
2166 .no_autobind = true,
2167 .diag_destroy = tcp_abort,
2168 };
2169 EXPORT_SYMBOL_GPL(tcpv6_prot);
2170
2171 static const struct inet6_protocol tcpv6_protocol = {
2172 .handler = tcp_v6_rcv,
2173 .err_handler = tcp_v6_err,
2174 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2175 };
2176
2177 static struct inet_protosw tcpv6_protosw = {
2178 .type = SOCK_STREAM,
2179 .protocol = IPPROTO_TCP,
2180 .prot = &tcpv6_prot,
2181 .ops = &inet6_stream_ops,
2182 .flags = INET_PROTOSW_PERMANENT |
2183 INET_PROTOSW_ICSK,
2184 };
2185
tcpv6_net_init(struct net * net)2186 static int __net_init tcpv6_net_init(struct net *net)
2187 {
2188 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2189 SOCK_RAW, IPPROTO_TCP, net);
2190 }
2191
tcpv6_net_exit(struct net * net)2192 static void __net_exit tcpv6_net_exit(struct net *net)
2193 {
2194 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2195 }
2196
tcpv6_net_exit_batch(struct list_head * net_exit_list)2197 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2198 {
2199 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2200 }
2201
2202 static struct pernet_operations tcpv6_net_ops = {
2203 .init = tcpv6_net_init,
2204 .exit = tcpv6_net_exit,
2205 .exit_batch = tcpv6_net_exit_batch,
2206 };
2207
tcpv6_init(void)2208 int __init tcpv6_init(void)
2209 {
2210 int ret;
2211
2212 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2213 if (ret)
2214 goto out;
2215
2216 /* register inet6 protocol */
2217 ret = inet6_register_protosw(&tcpv6_protosw);
2218 if (ret)
2219 goto out_tcpv6_protocol;
2220
2221 ret = register_pernet_subsys(&tcpv6_net_ops);
2222 if (ret)
2223 goto out_tcpv6_protosw;
2224
2225 ret = mptcpv6_init();
2226 if (ret)
2227 goto out_tcpv6_pernet_subsys;
2228
2229 out:
2230 return ret;
2231
2232 out_tcpv6_pernet_subsys:
2233 unregister_pernet_subsys(&tcpv6_net_ops);
2234 out_tcpv6_protosw:
2235 inet6_unregister_protosw(&tcpv6_protosw);
2236 out_tcpv6_protocol:
2237 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2238 goto out;
2239 }
2240
tcpv6_exit(void)2241 void tcpv6_exit(void)
2242 {
2243 unregister_pernet_subsys(&tcpv6_net_ops);
2244 inet6_unregister_protosw(&tcpv6_protosw);
2245 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2246 }
2247