1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
85 {
86 return NULL;
87 }
88 #endif
89
90 /* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
94 */
tcp_inet6_sk(const struct sock * sk)95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96 {
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100 }
101
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103 {
104 struct dst_entry *dst = skb_dst(skb);
105
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
108
109 rcu_assign_pointer(sk->sk_rx_dst, dst);
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112 }
113 }
114
tcp_v6_init_seq(const struct sk_buff * skb)115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116 {
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
119 tcp_hdr(skb)->dest,
120 tcp_hdr(skb)->source);
121 }
122
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124 {
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
127 }
128
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 int addr_len)
131 {
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
135 */
136 if (addr_len < SIN6_LEN_RFC2133)
137 return -EINVAL;
138
139 sock_owned_by_me(sk);
140
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142 }
143
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 int addr_len)
146 {
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
154 struct flowi6 fl6;
155 struct dst_entry *dst;
156 int addr_type;
157 int err;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159
160 if (addr_len < SIN6_LEN_RFC2133)
161 return -EINVAL;
162
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
165
166 memset(&fl6, 0, sizeof(fl6));
167
168 if (np->sndflow) {
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 if (IS_ERR(flowlabel))
175 return -EINVAL;
176 fl6_sock_release(flowlabel);
177 }
178 }
179
180 /*
181 * connect() to INADDR_ANY means loopback (BSD'ism).
182 */
183
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 &usin->sin6_addr);
188 else
189 usin->sin6_addr = in6addr_loopback;
190 }
191
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
193
194 if (addr_type & IPV6_ADDR_MULTICAST)
195 return -ENETUNREACH;
196
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
201 * must coincide.
202 */
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 return -EINVAL;
205
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
207 }
208
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
211 return -EINVAL;
212 }
213
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
218 WRITE_ONCE(tp->write_seq, 0);
219 }
220
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
223
224 /*
225 * TCP over IPv4
226 */
227
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
231
232 if (__ipv6_only_sock(sk))
233 return -ENETUNREACH;
234
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238
239 icsk->icsk_af_ops = &ipv6_mapped;
240 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
244
245 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246
247 if (err) {
248 icsk->icsk_ext_hdr_len = exthdrlen;
249 icsk->icsk_af_ops = &ipv6_specific;
250 sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254 goto failure;
255 }
256 np->saddr = sk->sk_v6_rcv_saddr;
257
258 return err;
259 }
260
261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 saddr = &sk->sk_v6_rcv_saddr;
263
264 fl6.flowi6_proto = IPPROTO_TCP;
265 fl6.daddr = sk->sk_v6_daddr;
266 fl6.saddr = saddr ? *saddr : np->saddr;
267 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
268 fl6.flowi6_oif = sk->sk_bound_dev_if;
269 fl6.flowi6_mark = sk->sk_mark;
270 fl6.fl6_dport = usin->sin6_port;
271 fl6.fl6_sport = inet->inet_sport;
272 fl6.flowi6_uid = sk->sk_uid;
273
274 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
275 final_p = fl6_update_dst(&fl6, opt, &final);
276
277 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
278
279 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
280 if (IS_ERR(dst)) {
281 err = PTR_ERR(dst);
282 goto failure;
283 }
284
285 if (!saddr) {
286 saddr = &fl6.saddr;
287 sk->sk_v6_rcv_saddr = *saddr;
288 }
289
290 /* set the source address */
291 np->saddr = *saddr;
292 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
293
294 sk->sk_gso_type = SKB_GSO_TCPV6;
295 ip6_dst_store(sk, dst, NULL, NULL);
296
297 icsk->icsk_ext_hdr_len = 0;
298 if (opt)
299 icsk->icsk_ext_hdr_len = opt->opt_flen +
300 opt->opt_nflen;
301
302 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
303
304 inet->inet_dport = usin->sin6_port;
305
306 tcp_set_state(sk, TCP_SYN_SENT);
307 err = inet6_hash_connect(tcp_death_row, sk);
308 if (err)
309 goto late_failure;
310
311 sk_set_txhash(sk);
312
313 if (likely(!tp->repair)) {
314 if (!tp->write_seq)
315 WRITE_ONCE(tp->write_seq,
316 secure_tcpv6_seq(np->saddr.s6_addr32,
317 sk->sk_v6_daddr.s6_addr32,
318 inet->inet_sport,
319 inet->inet_dport));
320 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
321 np->saddr.s6_addr32,
322 sk->sk_v6_daddr.s6_addr32);
323 }
324
325 if (tcp_fastopen_defer_connect(sk, &err))
326 return err;
327 if (err)
328 goto late_failure;
329
330 err = tcp_connect(sk);
331 if (err)
332 goto late_failure;
333
334 return 0;
335
336 late_failure:
337 tcp_set_state(sk, TCP_CLOSE);
338 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
339 inet_reset_saddr(sk);
340 failure:
341 inet->inet_dport = 0;
342 sk->sk_route_caps = 0;
343 return err;
344 }
345
tcp_v6_mtu_reduced(struct sock * sk)346 static void tcp_v6_mtu_reduced(struct sock *sk)
347 {
348 struct dst_entry *dst;
349 u32 mtu;
350
351 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
352 return;
353
354 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
355
356 /* Drop requests trying to increase our current mss.
357 * Check done in __ip6_rt_update_pmtu() is too late.
358 */
359 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
360 return;
361
362 dst = inet6_csk_update_pmtu(sk, mtu);
363 if (!dst)
364 return;
365
366 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
367 tcp_sync_mss(sk, dst_mtu(dst));
368 tcp_simple_retransmit(sk);
369 }
370 }
371
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)372 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
373 u8 type, u8 code, int offset, __be32 info)
374 {
375 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
376 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
377 struct net *net = dev_net(skb->dev);
378 struct request_sock *fastopen;
379 struct ipv6_pinfo *np;
380 struct tcp_sock *tp;
381 __u32 seq, snd_una;
382 struct sock *sk;
383 bool fatal;
384 int err;
385
386 sk = __inet6_lookup_established(net, &tcp_hashinfo,
387 &hdr->daddr, th->dest,
388 &hdr->saddr, ntohs(th->source),
389 skb->dev->ifindex, inet6_sdif(skb));
390
391 if (!sk) {
392 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
393 ICMP6_MIB_INERRORS);
394 return -ENOENT;
395 }
396
397 if (sk->sk_state == TCP_TIME_WAIT) {
398 inet_twsk_put(inet_twsk(sk));
399 return 0;
400 }
401 seq = ntohl(th->seq);
402 fatal = icmpv6_err_convert(type, code, &err);
403 if (sk->sk_state == TCP_NEW_SYN_RECV) {
404 tcp_req_err(sk, seq, fatal);
405 return 0;
406 }
407
408 bh_lock_sock(sk);
409 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
410 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
411
412 if (sk->sk_state == TCP_CLOSE)
413 goto out;
414
415 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
416 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
417 goto out;
418 }
419
420 tp = tcp_sk(sk);
421 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
422 fastopen = rcu_dereference(tp->fastopen_rsk);
423 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
424 if (sk->sk_state != TCP_LISTEN &&
425 !between(seq, snd_una, tp->snd_nxt)) {
426 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
427 goto out;
428 }
429
430 np = tcp_inet6_sk(sk);
431
432 if (type == NDISC_REDIRECT) {
433 if (!sock_owned_by_user(sk)) {
434 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
435
436 if (dst)
437 dst->ops->redirect(dst, sk, skb);
438 }
439 goto out;
440 }
441
442 if (type == ICMPV6_PKT_TOOBIG) {
443 u32 mtu = ntohl(info);
444
445 /* We are not interested in TCP_LISTEN and open_requests
446 * (SYN-ACKs send out by Linux are always <576bytes so
447 * they should go through unfragmented).
448 */
449 if (sk->sk_state == TCP_LISTEN)
450 goto out;
451
452 if (!ip6_sk_accept_pmtu(sk))
453 goto out;
454
455 if (mtu < IPV6_MIN_MTU)
456 goto out;
457
458 WRITE_ONCE(tp->mtu_info, mtu);
459
460 if (!sock_owned_by_user(sk))
461 tcp_v6_mtu_reduced(sk);
462 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
463 &sk->sk_tsq_flags))
464 sock_hold(sk);
465 goto out;
466 }
467
468
469 /* Might be for an request_sock */
470 switch (sk->sk_state) {
471 case TCP_SYN_SENT:
472 case TCP_SYN_RECV:
473 /* Only in fast or simultaneous open. If a fast open socket is
474 * is already accepted it is treated as a connected one below.
475 */
476 if (fastopen && !fastopen->sk)
477 break;
478
479 if (!sock_owned_by_user(sk)) {
480 sk->sk_err = err;
481 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
482
483 tcp_done(sk);
484 } else
485 sk->sk_err_soft = err;
486 goto out;
487 }
488
489 if (!sock_owned_by_user(sk) && np->recverr) {
490 sk->sk_err = err;
491 sk->sk_error_report(sk);
492 } else
493 sk->sk_err_soft = err;
494
495 out:
496 bh_unlock_sock(sk);
497 sock_put(sk);
498 return 0;
499 }
500
501
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type)502 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
503 struct flowi *fl,
504 struct request_sock *req,
505 struct tcp_fastopen_cookie *foc,
506 enum tcp_synack_type synack_type)
507 {
508 struct inet_request_sock *ireq = inet_rsk(req);
509 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
510 struct ipv6_txoptions *opt;
511 struct flowi6 *fl6 = &fl->u.ip6;
512 struct sk_buff *skb;
513 int err = -ENOMEM;
514
515 /* First, grab a route. */
516 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
517 IPPROTO_TCP)) == NULL)
518 goto done;
519
520 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
521
522 if (skb) {
523 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
524 &ireq->ir_v6_rmt_addr);
525
526 fl6->daddr = ireq->ir_v6_rmt_addr;
527 if (np->repflow && ireq->pktopts)
528 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
529
530 rcu_read_lock();
531 opt = ireq->ipv6_opt;
532 if (!opt)
533 opt = rcu_dereference(np->opt);
534 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
535 np->tclass, sk->sk_priority);
536 rcu_read_unlock();
537 err = net_xmit_eval(err);
538 }
539
540 done:
541 return err;
542 }
543
544
tcp_v6_reqsk_destructor(struct request_sock * req)545 static void tcp_v6_reqsk_destructor(struct request_sock *req)
546 {
547 kfree(inet_rsk(req)->ipv6_opt);
548 kfree_skb(inet_rsk(req)->pktopts);
549 }
550
551 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)552 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
553 const struct in6_addr *addr)
554 {
555 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
556 }
557
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)558 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
559 const struct sock *addr_sk)
560 {
561 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
562 }
563
tcp_v6_parse_md5_keys(struct sock * sk,int optname,char __user * optval,int optlen)564 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
565 char __user *optval, int optlen)
566 {
567 struct tcp_md5sig cmd;
568 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
569 u8 prefixlen;
570
571 if (optlen < sizeof(cmd))
572 return -EINVAL;
573
574 if (copy_from_user(&cmd, optval, sizeof(cmd)))
575 return -EFAULT;
576
577 if (sin6->sin6_family != AF_INET6)
578 return -EINVAL;
579
580 if (optname == TCP_MD5SIG_EXT &&
581 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
582 prefixlen = cmd.tcpm_prefixlen;
583 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
584 prefixlen > 32))
585 return -EINVAL;
586 } else {
587 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
588 }
589
590 if (!cmd.tcpm_keylen) {
591 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
592 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
593 AF_INET, prefixlen);
594 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
595 AF_INET6, prefixlen);
596 }
597
598 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
599 return -EINVAL;
600
601 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
602 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
603 AF_INET, prefixlen, cmd.tcpm_key,
604 cmd.tcpm_keylen, GFP_KERNEL);
605
606 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
607 AF_INET6, prefixlen, cmd.tcpm_key,
608 cmd.tcpm_keylen, GFP_KERNEL);
609 }
610
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)611 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
612 const struct in6_addr *daddr,
613 const struct in6_addr *saddr,
614 const struct tcphdr *th, int nbytes)
615 {
616 struct tcp6_pseudohdr *bp;
617 struct scatterlist sg;
618 struct tcphdr *_th;
619
620 bp = hp->scratch;
621 /* 1. TCP pseudo-header (RFC2460) */
622 bp->saddr = *saddr;
623 bp->daddr = *daddr;
624 bp->protocol = cpu_to_be32(IPPROTO_TCP);
625 bp->len = cpu_to_be32(nbytes);
626
627 _th = (struct tcphdr *)(bp + 1);
628 memcpy(_th, th, sizeof(*th));
629 _th->check = 0;
630
631 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
632 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
633 sizeof(*bp) + sizeof(*th));
634 return crypto_ahash_update(hp->md5_req);
635 }
636
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)637 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
638 const struct in6_addr *daddr, struct in6_addr *saddr,
639 const struct tcphdr *th)
640 {
641 struct tcp_md5sig_pool *hp;
642 struct ahash_request *req;
643
644 hp = tcp_get_md5sig_pool();
645 if (!hp)
646 goto clear_hash_noput;
647 req = hp->md5_req;
648
649 if (crypto_ahash_init(req))
650 goto clear_hash;
651 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
652 goto clear_hash;
653 if (tcp_md5_hash_key(hp, key))
654 goto clear_hash;
655 ahash_request_set_crypt(req, NULL, md5_hash, 0);
656 if (crypto_ahash_final(req))
657 goto clear_hash;
658
659 tcp_put_md5sig_pool();
660 return 0;
661
662 clear_hash:
663 tcp_put_md5sig_pool();
664 clear_hash_noput:
665 memset(md5_hash, 0, 16);
666 return 1;
667 }
668
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)669 static int tcp_v6_md5_hash_skb(char *md5_hash,
670 const struct tcp_md5sig_key *key,
671 const struct sock *sk,
672 const struct sk_buff *skb)
673 {
674 const struct in6_addr *saddr, *daddr;
675 struct tcp_md5sig_pool *hp;
676 struct ahash_request *req;
677 const struct tcphdr *th = tcp_hdr(skb);
678
679 if (sk) { /* valid for establish/request sockets */
680 saddr = &sk->sk_v6_rcv_saddr;
681 daddr = &sk->sk_v6_daddr;
682 } else {
683 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
684 saddr = &ip6h->saddr;
685 daddr = &ip6h->daddr;
686 }
687
688 hp = tcp_get_md5sig_pool();
689 if (!hp)
690 goto clear_hash_noput;
691 req = hp->md5_req;
692
693 if (crypto_ahash_init(req))
694 goto clear_hash;
695
696 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
697 goto clear_hash;
698 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
699 goto clear_hash;
700 if (tcp_md5_hash_key(hp, key))
701 goto clear_hash;
702 ahash_request_set_crypt(req, NULL, md5_hash, 0);
703 if (crypto_ahash_final(req))
704 goto clear_hash;
705
706 tcp_put_md5sig_pool();
707 return 0;
708
709 clear_hash:
710 tcp_put_md5sig_pool();
711 clear_hash_noput:
712 memset(md5_hash, 0, 16);
713 return 1;
714 }
715
716 #endif
717
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb)718 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
719 const struct sk_buff *skb)
720 {
721 #ifdef CONFIG_TCP_MD5SIG
722 const __u8 *hash_location = NULL;
723 struct tcp_md5sig_key *hash_expected;
724 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
725 const struct tcphdr *th = tcp_hdr(skb);
726 int genhash;
727 u8 newhash[16];
728
729 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
730 hash_location = tcp_parse_md5sig_option(th);
731
732 /* We've parsed the options - do we have a hash? */
733 if (!hash_expected && !hash_location)
734 return false;
735
736 if (hash_expected && !hash_location) {
737 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
738 return true;
739 }
740
741 if (!hash_expected && hash_location) {
742 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
743 return true;
744 }
745
746 /* check the signature */
747 genhash = tcp_v6_md5_hash_skb(newhash,
748 hash_expected,
749 NULL, skb);
750
751 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
752 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
753 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
754 genhash ? "failed" : "mismatch",
755 &ip6h->saddr, ntohs(th->source),
756 &ip6h->daddr, ntohs(th->dest));
757 return true;
758 }
759 #endif
760 return false;
761 }
762
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)763 static void tcp_v6_init_req(struct request_sock *req,
764 const struct sock *sk_listener,
765 struct sk_buff *skb)
766 {
767 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
768 struct inet_request_sock *ireq = inet_rsk(req);
769 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
770
771 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
772 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
773
774 /* So that link locals have meaning */
775 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
776 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
777 ireq->ir_iif = tcp_v6_iif(skb);
778
779 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
780 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
781 np->rxopt.bits.rxinfo ||
782 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
783 np->rxopt.bits.rxohlim || np->repflow)) {
784 refcount_inc(&skb->users);
785 ireq->pktopts = skb;
786 }
787 }
788
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)789 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
790 struct flowi *fl,
791 const struct request_sock *req)
792 {
793 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
794 }
795
796 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
797 .family = AF_INET6,
798 .obj_size = sizeof(struct tcp6_request_sock),
799 .rtx_syn_ack = tcp_rtx_synack,
800 .send_ack = tcp_v6_reqsk_send_ack,
801 .destructor = tcp_v6_reqsk_destructor,
802 .send_reset = tcp_v6_send_reset,
803 .syn_ack_timeout = tcp_syn_ack_timeout,
804 };
805
806 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
807 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
808 sizeof(struct ipv6hdr),
809 #ifdef CONFIG_TCP_MD5SIG
810 .req_md5_lookup = tcp_v6_md5_lookup,
811 .calc_md5_hash = tcp_v6_md5_hash_skb,
812 #endif
813 .init_req = tcp_v6_init_req,
814 #ifdef CONFIG_SYN_COOKIES
815 .cookie_init_seq = cookie_v6_init_sequence,
816 #endif
817 .route_req = tcp_v6_route_req,
818 .init_seq = tcp_v6_init_seq,
819 .init_ts_off = tcp_v6_init_ts_off,
820 .send_synack = tcp_v6_send_synack,
821 };
822
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)823 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
824 u32 ack, u32 win, u32 tsval, u32 tsecr,
825 int oif, struct tcp_md5sig_key *key, int rst,
826 u8 tclass, __be32 label, u32 priority)
827 {
828 const struct tcphdr *th = tcp_hdr(skb);
829 struct tcphdr *t1;
830 struct sk_buff *buff;
831 struct flowi6 fl6;
832 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
833 struct sock *ctl_sk = net->ipv6.tcp_sk;
834 unsigned int tot_len = sizeof(struct tcphdr);
835 struct dst_entry *dst;
836 __be32 *topt;
837 __u32 mark = 0;
838
839 if (tsecr)
840 tot_len += TCPOLEN_TSTAMP_ALIGNED;
841 #ifdef CONFIG_TCP_MD5SIG
842 if (key)
843 tot_len += TCPOLEN_MD5SIG_ALIGNED;
844 #endif
845
846 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
847 GFP_ATOMIC);
848 if (!buff)
849 return;
850
851 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
852
853 t1 = skb_push(buff, tot_len);
854 skb_reset_transport_header(buff);
855
856 /* Swap the send and the receive. */
857 memset(t1, 0, sizeof(*t1));
858 t1->dest = th->source;
859 t1->source = th->dest;
860 t1->doff = tot_len / 4;
861 t1->seq = htonl(seq);
862 t1->ack_seq = htonl(ack);
863 t1->ack = !rst || !th->ack;
864 t1->rst = rst;
865 t1->window = htons(win);
866
867 topt = (__be32 *)(t1 + 1);
868
869 if (tsecr) {
870 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
871 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
872 *topt++ = htonl(tsval);
873 *topt++ = htonl(tsecr);
874 }
875
876 #ifdef CONFIG_TCP_MD5SIG
877 if (key) {
878 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
879 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
880 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
881 &ipv6_hdr(skb)->saddr,
882 &ipv6_hdr(skb)->daddr, t1);
883 }
884 #endif
885
886 memset(&fl6, 0, sizeof(fl6));
887 fl6.daddr = ipv6_hdr(skb)->saddr;
888 fl6.saddr = ipv6_hdr(skb)->daddr;
889 fl6.flowlabel = label;
890
891 buff->ip_summed = CHECKSUM_PARTIAL;
892 buff->csum = 0;
893
894 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
895
896 fl6.flowi6_proto = IPPROTO_TCP;
897 if (rt6_need_strict(&fl6.daddr) && !oif)
898 fl6.flowi6_oif = tcp_v6_iif(skb);
899 else {
900 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
901 oif = skb->skb_iif;
902
903 fl6.flowi6_oif = oif;
904 }
905
906 if (sk) {
907 if (sk->sk_state == TCP_TIME_WAIT) {
908 mark = inet_twsk(sk)->tw_mark;
909 /* autoflowlabel relies on buff->hash */
910 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
911 PKT_HASH_TYPE_L4);
912 } else {
913 mark = sk->sk_mark;
914 }
915 buff->tstamp = tcp_transmit_time(sk);
916 }
917 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
918 fl6.fl6_dport = t1->dest;
919 fl6.fl6_sport = t1->source;
920 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
921 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
922
923 /* Pass a socket to ip6_dst_lookup either it is for RST
924 * Underlying function will use this to retrieve the network
925 * namespace
926 */
927 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
928 if (!IS_ERR(dst)) {
929 skb_dst_set(buff, dst);
930 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
931 priority);
932 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
933 if (rst)
934 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
935 return;
936 }
937
938 kfree_skb(buff);
939 }
940
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)941 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
942 {
943 const struct tcphdr *th = tcp_hdr(skb);
944 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
945 u32 seq = 0, ack_seq = 0;
946 struct tcp_md5sig_key *key = NULL;
947 #ifdef CONFIG_TCP_MD5SIG
948 const __u8 *hash_location = NULL;
949 unsigned char newhash[16];
950 int genhash;
951 struct sock *sk1 = NULL;
952 #endif
953 __be32 label = 0;
954 u32 priority = 0;
955 struct net *net;
956 int oif = 0;
957
958 if (th->rst)
959 return;
960
961 /* If sk not NULL, it means we did a successful lookup and incoming
962 * route had to be correct. prequeue might have dropped our dst.
963 */
964 if (!sk && !ipv6_unicast_destination(skb))
965 return;
966
967 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
968 #ifdef CONFIG_TCP_MD5SIG
969 rcu_read_lock();
970 hash_location = tcp_parse_md5sig_option(th);
971 if (sk && sk_fullsock(sk)) {
972 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
973 } else if (hash_location) {
974 /*
975 * active side is lost. Try to find listening socket through
976 * source port, and then find md5 key through listening socket.
977 * we are not loose security here:
978 * Incoming packet is checked with md5 hash with finding key,
979 * no RST generated if md5 hash doesn't match.
980 */
981 sk1 = inet6_lookup_listener(net,
982 &tcp_hashinfo, NULL, 0,
983 &ipv6h->saddr,
984 th->source, &ipv6h->daddr,
985 ntohs(th->source),
986 tcp_v6_iif_l3_slave(skb),
987 tcp_v6_sdif(skb));
988 if (!sk1)
989 goto out;
990
991 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
992 if (!key)
993 goto out;
994
995 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
996 if (genhash || memcmp(hash_location, newhash, 16) != 0)
997 goto out;
998 }
999 #endif
1000
1001 if (th->ack)
1002 seq = ntohl(th->ack_seq);
1003 else
1004 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1005 (th->doff << 2);
1006
1007 if (sk) {
1008 oif = sk->sk_bound_dev_if;
1009 if (sk_fullsock(sk)) {
1010 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1011
1012 trace_tcp_send_reset(sk, skb);
1013 if (np->repflow)
1014 label = ip6_flowlabel(ipv6h);
1015 priority = sk->sk_priority;
1016 }
1017 if (sk->sk_state == TCP_TIME_WAIT) {
1018 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1019 priority = inet_twsk(sk)->tw_priority;
1020 }
1021 } else {
1022 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1023 label = ip6_flowlabel(ipv6h);
1024 }
1025
1026 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1027 label, priority);
1028
1029 #ifdef CONFIG_TCP_MD5SIG
1030 out:
1031 rcu_read_unlock();
1032 #endif
1033 }
1034
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1035 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1036 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1037 struct tcp_md5sig_key *key, u8 tclass,
1038 __be32 label, u32 priority)
1039 {
1040 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1041 tclass, label, priority);
1042 }
1043
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1044 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1045 {
1046 struct inet_timewait_sock *tw = inet_twsk(sk);
1047 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1048
1049 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1050 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1051 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1052 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1053 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1054
1055 inet_twsk_put(tw);
1056 }
1057
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1058 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1059 struct request_sock *req)
1060 {
1061 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1062 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1063 */
1064 /* RFC 7323 2.3
1065 * The window field (SEG.WND) of every outgoing segment, with the
1066 * exception of <SYN> segments, MUST be right-shifted by
1067 * Rcv.Wind.Shift bits:
1068 */
1069 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1070 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1071 tcp_rsk(req)->rcv_nxt,
1072 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1073 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1074 req->ts_recent, sk->sk_bound_dev_if,
1075 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1076 0, 0, sk->sk_priority);
1077 }
1078
1079
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1080 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1081 {
1082 #ifdef CONFIG_SYN_COOKIES
1083 const struct tcphdr *th = tcp_hdr(skb);
1084
1085 if (!th->syn)
1086 sk = cookie_v6_check(sk, skb);
1087 #endif
1088 return sk;
1089 }
1090
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1091 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1092 struct tcphdr *th, u32 *cookie)
1093 {
1094 u16 mss = 0;
1095 #ifdef CONFIG_SYN_COOKIES
1096 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1097 &tcp_request_sock_ipv6_ops, sk, th);
1098 if (mss) {
1099 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1100 tcp_synq_overflow(sk);
1101 }
1102 #endif
1103 return mss;
1104 }
1105
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1106 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1107 {
1108 if (skb->protocol == htons(ETH_P_IP))
1109 return tcp_v4_conn_request(sk, skb);
1110
1111 if (!ipv6_unicast_destination(skb))
1112 goto drop;
1113
1114 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1115 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1116 return 0;
1117 }
1118
1119 return tcp_conn_request(&tcp6_request_sock_ops,
1120 &tcp_request_sock_ipv6_ops, sk, skb);
1121
1122 drop:
1123 tcp_listendrop(sk);
1124 return 0; /* don't send reset */
1125 }
1126
tcp_v6_restore_cb(struct sk_buff * skb)1127 static void tcp_v6_restore_cb(struct sk_buff *skb)
1128 {
1129 /* We need to move header back to the beginning if xfrm6_policy_check()
1130 * and tcp_v6_fill_cb() are going to be called again.
1131 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1132 */
1133 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1134 sizeof(struct inet6_skb_parm));
1135 }
1136
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1137 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1138 struct request_sock *req,
1139 struct dst_entry *dst,
1140 struct request_sock *req_unhash,
1141 bool *own_req)
1142 {
1143 struct inet_request_sock *ireq;
1144 struct ipv6_pinfo *newnp;
1145 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1146 struct ipv6_txoptions *opt;
1147 struct inet_sock *newinet;
1148 bool found_dup_sk = false;
1149 struct tcp_sock *newtp;
1150 struct sock *newsk;
1151 #ifdef CONFIG_TCP_MD5SIG
1152 struct tcp_md5sig_key *key;
1153 #endif
1154 struct flowi6 fl6;
1155
1156 if (skb->protocol == htons(ETH_P_IP)) {
1157 /*
1158 * v6 mapped
1159 */
1160
1161 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1162 req_unhash, own_req);
1163
1164 if (!newsk)
1165 return NULL;
1166
1167 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1168
1169 newinet = inet_sk(newsk);
1170 newnp = tcp_inet6_sk(newsk);
1171 newtp = tcp_sk(newsk);
1172
1173 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1174
1175 newnp->saddr = newsk->sk_v6_rcv_saddr;
1176
1177 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1178 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1179 #ifdef CONFIG_TCP_MD5SIG
1180 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1181 #endif
1182
1183 newnp->ipv6_mc_list = NULL;
1184 newnp->ipv6_ac_list = NULL;
1185 newnp->ipv6_fl_list = NULL;
1186 newnp->pktoptions = NULL;
1187 newnp->opt = NULL;
1188 newnp->mcast_oif = inet_iif(skb);
1189 newnp->mcast_hops = ip_hdr(skb)->ttl;
1190 newnp->rcv_flowinfo = 0;
1191 if (np->repflow)
1192 newnp->flow_label = 0;
1193
1194 /*
1195 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1196 * here, tcp_create_openreq_child now does this for us, see the comment in
1197 * that function for the gory details. -acme
1198 */
1199
1200 /* It is tricky place. Until this moment IPv4 tcp
1201 worked with IPv6 icsk.icsk_af_ops.
1202 Sync it now.
1203 */
1204 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1205
1206 return newsk;
1207 }
1208
1209 ireq = inet_rsk(req);
1210
1211 if (sk_acceptq_is_full(sk))
1212 goto out_overflow;
1213
1214 if (!dst) {
1215 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1216 if (!dst)
1217 goto out;
1218 }
1219
1220 newsk = tcp_create_openreq_child(sk, req, skb);
1221 if (!newsk)
1222 goto out_nonewsk;
1223
1224 /*
1225 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1226 * count here, tcp_create_openreq_child now does this for us, see the
1227 * comment in that function for the gory details. -acme
1228 */
1229
1230 newsk->sk_gso_type = SKB_GSO_TCPV6;
1231 ip6_dst_store(newsk, dst, NULL, NULL);
1232 inet6_sk_rx_dst_set(newsk, skb);
1233
1234 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1235
1236 newtp = tcp_sk(newsk);
1237 newinet = inet_sk(newsk);
1238 newnp = tcp_inet6_sk(newsk);
1239
1240 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1241
1242 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1243 newnp->saddr = ireq->ir_v6_loc_addr;
1244 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1245 newsk->sk_bound_dev_if = ireq->ir_iif;
1246
1247 /* Now IPv6 options...
1248
1249 First: no IPv4 options.
1250 */
1251 newinet->inet_opt = NULL;
1252 newnp->ipv6_mc_list = NULL;
1253 newnp->ipv6_ac_list = NULL;
1254 newnp->ipv6_fl_list = NULL;
1255
1256 /* Clone RX bits */
1257 newnp->rxopt.all = np->rxopt.all;
1258
1259 newnp->pktoptions = NULL;
1260 newnp->opt = NULL;
1261 newnp->mcast_oif = tcp_v6_iif(skb);
1262 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1263 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1264 if (np->repflow)
1265 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1266
1267 /* Clone native IPv6 options from listening socket (if any)
1268
1269 Yes, keeping reference count would be much more clever,
1270 but we make one more one thing there: reattach optmem
1271 to newsk.
1272 */
1273 opt = ireq->ipv6_opt;
1274 if (!opt)
1275 opt = rcu_dereference(np->opt);
1276 if (opt) {
1277 opt = ipv6_dup_options(newsk, opt);
1278 RCU_INIT_POINTER(newnp->opt, opt);
1279 }
1280 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1281 if (opt)
1282 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1283 opt->opt_flen;
1284
1285 tcp_ca_openreq_child(newsk, dst);
1286
1287 tcp_sync_mss(newsk, dst_mtu(dst));
1288 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1289
1290 tcp_initialize_rcv_mss(newsk);
1291
1292 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1293 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1294
1295 #ifdef CONFIG_TCP_MD5SIG
1296 /* Copy over the MD5 key from the original socket */
1297 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1298 if (key) {
1299 /* We're using one, so create a matching key
1300 * on the newsk structure. If we fail to get
1301 * memory, then we end up not copying the key
1302 * across. Shucks.
1303 */
1304 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1305 AF_INET6, 128, key->key, key->keylen,
1306 sk_gfp_mask(sk, GFP_ATOMIC));
1307 }
1308 #endif
1309
1310 if (__inet_inherit_port(sk, newsk) < 0) {
1311 inet_csk_prepare_forced_close(newsk);
1312 tcp_done(newsk);
1313 goto out;
1314 }
1315 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1316 &found_dup_sk);
1317 if (*own_req) {
1318 tcp_move_syn(newtp, req);
1319
1320 /* Clone pktoptions received with SYN, if we own the req */
1321 if (ireq->pktopts) {
1322 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1323 consume_skb(ireq->pktopts);
1324 ireq->pktopts = NULL;
1325 if (newnp->pktoptions)
1326 tcp_v6_restore_cb(newnp->pktoptions);
1327 }
1328 } else {
1329 if (!req_unhash && found_dup_sk) {
1330 /* This code path should only be executed in the
1331 * syncookie case only
1332 */
1333 bh_unlock_sock(newsk);
1334 sock_put(newsk);
1335 newsk = NULL;
1336 }
1337 }
1338
1339 return newsk;
1340
1341 out_overflow:
1342 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1343 out_nonewsk:
1344 dst_release(dst);
1345 out:
1346 tcp_listendrop(sk);
1347 return NULL;
1348 }
1349
1350 /* The socket must have it's spinlock held when we get
1351 * here, unless it is a TCP_LISTEN socket.
1352 *
1353 * We have a potential double-lock case here, so even when
1354 * doing backlog processing we use the BH locking scheme.
1355 * This is because we cannot sleep with the original spinlock
1356 * held.
1357 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1358 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1359 {
1360 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1361 struct sk_buff *opt_skb = NULL;
1362 struct tcp_sock *tp;
1363
1364 /* Imagine: socket is IPv6. IPv4 packet arrives,
1365 goes to IPv4 receive handler and backlogged.
1366 From backlog it always goes here. Kerboom...
1367 Fortunately, tcp_rcv_established and rcv_established
1368 handle them correctly, but it is not case with
1369 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1370 */
1371
1372 if (skb->protocol == htons(ETH_P_IP))
1373 return tcp_v4_do_rcv(sk, skb);
1374
1375 /*
1376 * socket locking is here for SMP purposes as backlog rcv
1377 * is currently called with bh processing disabled.
1378 */
1379
1380 /* Do Stevens' IPV6_PKTOPTIONS.
1381
1382 Yes, guys, it is the only place in our code, where we
1383 may make it not affecting IPv4.
1384 The rest of code is protocol independent,
1385 and I do not like idea to uglify IPv4.
1386
1387 Actually, all the idea behind IPV6_PKTOPTIONS
1388 looks not very well thought. For now we latch
1389 options, received in the last packet, enqueued
1390 by tcp. Feel free to propose better solution.
1391 --ANK (980728)
1392 */
1393 if (np->rxopt.all)
1394 opt_skb = skb_clone_and_charge_r(skb, sk);
1395
1396 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1397 struct dst_entry *dst;
1398
1399 dst = rcu_dereference_protected(sk->sk_rx_dst,
1400 lockdep_sock_is_held(sk));
1401
1402 sock_rps_save_rxhash(sk, skb);
1403 sk_mark_napi_id(sk, skb);
1404 if (dst) {
1405 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1406 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1407 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1408 dst_release(dst);
1409 }
1410 }
1411
1412 tcp_rcv_established(sk, skb);
1413 if (opt_skb)
1414 goto ipv6_pktoptions;
1415 return 0;
1416 }
1417
1418 if (tcp_checksum_complete(skb))
1419 goto csum_err;
1420
1421 if (sk->sk_state == TCP_LISTEN) {
1422 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1423
1424 if (!nsk)
1425 goto discard;
1426
1427 if (nsk != sk) {
1428 if (tcp_child_process(sk, nsk, skb))
1429 goto reset;
1430 if (opt_skb)
1431 __kfree_skb(opt_skb);
1432 return 0;
1433 }
1434 } else
1435 sock_rps_save_rxhash(sk, skb);
1436
1437 if (tcp_rcv_state_process(sk, skb))
1438 goto reset;
1439 if (opt_skb)
1440 goto ipv6_pktoptions;
1441 return 0;
1442
1443 reset:
1444 tcp_v6_send_reset(sk, skb);
1445 discard:
1446 if (opt_skb)
1447 __kfree_skb(opt_skb);
1448 kfree_skb(skb);
1449 return 0;
1450 csum_err:
1451 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1452 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1453 goto discard;
1454
1455
1456 ipv6_pktoptions:
1457 /* Do you ask, what is it?
1458
1459 1. skb was enqueued by tcp.
1460 2. skb is added to tail of read queue, rather than out of order.
1461 3. socket is not in passive state.
1462 4. Finally, it really contains options, which user wants to receive.
1463 */
1464 tp = tcp_sk(sk);
1465 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1466 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1467 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1468 np->mcast_oif = tcp_v6_iif(opt_skb);
1469 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1470 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1471 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1472 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1473 if (np->repflow)
1474 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1475 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1476 tcp_v6_restore_cb(opt_skb);
1477 opt_skb = xchg(&np->pktoptions, opt_skb);
1478 } else {
1479 __kfree_skb(opt_skb);
1480 opt_skb = xchg(&np->pktoptions, NULL);
1481 }
1482 }
1483
1484 kfree_skb(opt_skb);
1485 return 0;
1486 }
1487
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1488 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1489 const struct tcphdr *th)
1490 {
1491 /* This is tricky: we move IP6CB at its correct location into
1492 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1493 * _decode_session6() uses IP6CB().
1494 * barrier() makes sure compiler won't play aliasing games.
1495 */
1496 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1497 sizeof(struct inet6_skb_parm));
1498 barrier();
1499
1500 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1501 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1502 skb->len - th->doff*4);
1503 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1504 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1505 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1506 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1507 TCP_SKB_CB(skb)->sacked = 0;
1508 TCP_SKB_CB(skb)->has_rxtstamp =
1509 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1510 }
1511
tcp_v6_rcv(struct sk_buff * skb)1512 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1513 {
1514 struct sk_buff *skb_to_free;
1515 int sdif = inet6_sdif(skb);
1516 const struct tcphdr *th;
1517 const struct ipv6hdr *hdr;
1518 bool refcounted;
1519 struct sock *sk;
1520 int ret;
1521 struct net *net = dev_net(skb->dev);
1522
1523 if (skb->pkt_type != PACKET_HOST)
1524 goto discard_it;
1525
1526 /*
1527 * Count it even if it's bad.
1528 */
1529 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1530
1531 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1532 goto discard_it;
1533
1534 th = (const struct tcphdr *)skb->data;
1535
1536 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1537 goto bad_packet;
1538 if (!pskb_may_pull(skb, th->doff*4))
1539 goto discard_it;
1540
1541 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1542 goto csum_error;
1543
1544 th = (const struct tcphdr *)skb->data;
1545 hdr = ipv6_hdr(skb);
1546
1547 lookup:
1548 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1549 th->source, th->dest, inet6_iif(skb), sdif,
1550 &refcounted);
1551 if (!sk)
1552 goto no_tcp_socket;
1553
1554 process:
1555 if (sk->sk_state == TCP_TIME_WAIT)
1556 goto do_time_wait;
1557
1558 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1559 struct request_sock *req = inet_reqsk(sk);
1560 bool req_stolen = false;
1561 struct sock *nsk;
1562
1563 sk = req->rsk_listener;
1564 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1565 sk_drops_add(sk, skb);
1566 reqsk_put(req);
1567 goto discard_it;
1568 }
1569 if (tcp_checksum_complete(skb)) {
1570 reqsk_put(req);
1571 goto csum_error;
1572 }
1573 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1574 inet_csk_reqsk_queue_drop_and_put(sk, req);
1575 goto lookup;
1576 }
1577 sock_hold(sk);
1578 refcounted = true;
1579 nsk = NULL;
1580 if (!tcp_filter(sk, skb)) {
1581 th = (const struct tcphdr *)skb->data;
1582 hdr = ipv6_hdr(skb);
1583 tcp_v6_fill_cb(skb, hdr, th);
1584 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1585 }
1586 if (!nsk) {
1587 reqsk_put(req);
1588 if (req_stolen) {
1589 /* Another cpu got exclusive access to req
1590 * and created a full blown socket.
1591 * Try to feed this packet to this socket
1592 * instead of discarding it.
1593 */
1594 tcp_v6_restore_cb(skb);
1595 sock_put(sk);
1596 goto lookup;
1597 }
1598 goto discard_and_relse;
1599 }
1600 if (nsk == sk) {
1601 reqsk_put(req);
1602 tcp_v6_restore_cb(skb);
1603 } else if (tcp_child_process(sk, nsk, skb)) {
1604 tcp_v6_send_reset(nsk, skb);
1605 goto discard_and_relse;
1606 } else {
1607 sock_put(sk);
1608 return 0;
1609 }
1610 }
1611 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1612 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1613 goto discard_and_relse;
1614 }
1615
1616 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1617 goto discard_and_relse;
1618
1619 if (tcp_v6_inbound_md5_hash(sk, skb))
1620 goto discard_and_relse;
1621
1622 if (tcp_filter(sk, skb))
1623 goto discard_and_relse;
1624 th = (const struct tcphdr *)skb->data;
1625 hdr = ipv6_hdr(skb);
1626 tcp_v6_fill_cb(skb, hdr, th);
1627
1628 skb->dev = NULL;
1629
1630 if (sk->sk_state == TCP_LISTEN) {
1631 ret = tcp_v6_do_rcv(sk, skb);
1632 goto put_and_return;
1633 }
1634
1635 sk_incoming_cpu_update(sk);
1636
1637 bh_lock_sock_nested(sk);
1638 tcp_segs_in(tcp_sk(sk), skb);
1639 ret = 0;
1640 if (!sock_owned_by_user(sk)) {
1641 skb_to_free = sk->sk_rx_skb_cache;
1642 sk->sk_rx_skb_cache = NULL;
1643 ret = tcp_v6_do_rcv(sk, skb);
1644 } else {
1645 if (tcp_add_backlog(sk, skb))
1646 goto discard_and_relse;
1647 skb_to_free = NULL;
1648 }
1649 bh_unlock_sock(sk);
1650 if (skb_to_free)
1651 __kfree_skb(skb_to_free);
1652 put_and_return:
1653 if (refcounted)
1654 sock_put(sk);
1655 return ret ? -1 : 0;
1656
1657 no_tcp_socket:
1658 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1659 goto discard_it;
1660
1661 tcp_v6_fill_cb(skb, hdr, th);
1662
1663 if (tcp_checksum_complete(skb)) {
1664 csum_error:
1665 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1666 bad_packet:
1667 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1668 } else {
1669 tcp_v6_send_reset(NULL, skb);
1670 }
1671
1672 discard_it:
1673 kfree_skb(skb);
1674 return 0;
1675
1676 discard_and_relse:
1677 sk_drops_add(sk, skb);
1678 if (refcounted)
1679 sock_put(sk);
1680 goto discard_it;
1681
1682 do_time_wait:
1683 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1684 inet_twsk_put(inet_twsk(sk));
1685 goto discard_it;
1686 }
1687
1688 tcp_v6_fill_cb(skb, hdr, th);
1689
1690 if (tcp_checksum_complete(skb)) {
1691 inet_twsk_put(inet_twsk(sk));
1692 goto csum_error;
1693 }
1694
1695 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1696 case TCP_TW_SYN:
1697 {
1698 struct sock *sk2;
1699
1700 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1701 skb, __tcp_hdrlen(th),
1702 &ipv6_hdr(skb)->saddr, th->source,
1703 &ipv6_hdr(skb)->daddr,
1704 ntohs(th->dest),
1705 tcp_v6_iif_l3_slave(skb),
1706 sdif);
1707 if (sk2) {
1708 struct inet_timewait_sock *tw = inet_twsk(sk);
1709 inet_twsk_deschedule_put(tw);
1710 sk = sk2;
1711 tcp_v6_restore_cb(skb);
1712 refcounted = false;
1713 goto process;
1714 }
1715 }
1716 /* to ACK */
1717 /* fall through */
1718 case TCP_TW_ACK:
1719 tcp_v6_timewait_ack(sk, skb);
1720 break;
1721 case TCP_TW_RST:
1722 tcp_v6_send_reset(sk, skb);
1723 inet_twsk_deschedule_put(inet_twsk(sk));
1724 goto discard_it;
1725 case TCP_TW_SUCCESS:
1726 ;
1727 }
1728 goto discard_it;
1729 }
1730
tcp_v6_early_demux(struct sk_buff * skb)1731 void tcp_v6_early_demux(struct sk_buff *skb)
1732 {
1733 const struct ipv6hdr *hdr;
1734 const struct tcphdr *th;
1735 struct sock *sk;
1736
1737 if (skb->pkt_type != PACKET_HOST)
1738 return;
1739
1740 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1741 return;
1742
1743 hdr = ipv6_hdr(skb);
1744 th = tcp_hdr(skb);
1745
1746 if (th->doff < sizeof(struct tcphdr) / 4)
1747 return;
1748
1749 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1750 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1751 &hdr->saddr, th->source,
1752 &hdr->daddr, ntohs(th->dest),
1753 inet6_iif(skb), inet6_sdif(skb));
1754 if (sk) {
1755 skb->sk = sk;
1756 skb->destructor = sock_edemux;
1757 if (sk_fullsock(sk)) {
1758 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1759
1760 if (dst)
1761 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1762 if (dst &&
1763 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1764 skb_dst_set_noref(skb, dst);
1765 }
1766 }
1767 }
1768
1769 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1770 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1771 .twsk_unique = tcp_twsk_unique,
1772 .twsk_destructor = tcp_twsk_destructor,
1773 };
1774
1775 static const struct inet_connection_sock_af_ops ipv6_specific = {
1776 .queue_xmit = inet6_csk_xmit,
1777 .send_check = tcp_v6_send_check,
1778 .rebuild_header = inet6_sk_rebuild_header,
1779 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1780 .conn_request = tcp_v6_conn_request,
1781 .syn_recv_sock = tcp_v6_syn_recv_sock,
1782 .net_header_len = sizeof(struct ipv6hdr),
1783 .net_frag_header_len = sizeof(struct frag_hdr),
1784 .setsockopt = ipv6_setsockopt,
1785 .getsockopt = ipv6_getsockopt,
1786 .addr2sockaddr = inet6_csk_addr2sockaddr,
1787 .sockaddr_len = sizeof(struct sockaddr_in6),
1788 #ifdef CONFIG_COMPAT
1789 .compat_setsockopt = compat_ipv6_setsockopt,
1790 .compat_getsockopt = compat_ipv6_getsockopt,
1791 #endif
1792 .mtu_reduced = tcp_v6_mtu_reduced,
1793 };
1794
1795 #ifdef CONFIG_TCP_MD5SIG
1796 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1797 .md5_lookup = tcp_v6_md5_lookup,
1798 .calc_md5_hash = tcp_v6_md5_hash_skb,
1799 .md5_parse = tcp_v6_parse_md5_keys,
1800 };
1801 #endif
1802
1803 /*
1804 * TCP over IPv4 via INET6 API
1805 */
1806 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1807 .queue_xmit = ip_queue_xmit,
1808 .send_check = tcp_v4_send_check,
1809 .rebuild_header = inet_sk_rebuild_header,
1810 .sk_rx_dst_set = inet_sk_rx_dst_set,
1811 .conn_request = tcp_v6_conn_request,
1812 .syn_recv_sock = tcp_v6_syn_recv_sock,
1813 .net_header_len = sizeof(struct iphdr),
1814 .setsockopt = ipv6_setsockopt,
1815 .getsockopt = ipv6_getsockopt,
1816 .addr2sockaddr = inet6_csk_addr2sockaddr,
1817 .sockaddr_len = sizeof(struct sockaddr_in6),
1818 #ifdef CONFIG_COMPAT
1819 .compat_setsockopt = compat_ipv6_setsockopt,
1820 .compat_getsockopt = compat_ipv6_getsockopt,
1821 #endif
1822 .mtu_reduced = tcp_v4_mtu_reduced,
1823 };
1824
1825 #ifdef CONFIG_TCP_MD5SIG
1826 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1827 .md5_lookup = tcp_v4_md5_lookup,
1828 .calc_md5_hash = tcp_v4_md5_hash_skb,
1829 .md5_parse = tcp_v6_parse_md5_keys,
1830 };
1831 #endif
1832
1833 /* NOTE: A lot of things set to zero explicitly by call to
1834 * sk_alloc() so need not be done here.
1835 */
tcp_v6_init_sock(struct sock * sk)1836 static int tcp_v6_init_sock(struct sock *sk)
1837 {
1838 struct inet_connection_sock *icsk = inet_csk(sk);
1839
1840 tcp_init_sock(sk);
1841
1842 icsk->icsk_af_ops = &ipv6_specific;
1843
1844 #ifdef CONFIG_TCP_MD5SIG
1845 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1846 #endif
1847
1848 return 0;
1849 }
1850
1851 #ifdef CONFIG_PROC_FS
1852 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1853 static void get_openreq6(struct seq_file *seq,
1854 const struct request_sock *req, int i)
1855 {
1856 long ttd = req->rsk_timer.expires - jiffies;
1857 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1858 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1859
1860 if (ttd < 0)
1861 ttd = 0;
1862
1863 seq_printf(seq,
1864 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1865 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1866 i,
1867 src->s6_addr32[0], src->s6_addr32[1],
1868 src->s6_addr32[2], src->s6_addr32[3],
1869 inet_rsk(req)->ir_num,
1870 dest->s6_addr32[0], dest->s6_addr32[1],
1871 dest->s6_addr32[2], dest->s6_addr32[3],
1872 ntohs(inet_rsk(req)->ir_rmt_port),
1873 TCP_SYN_RECV,
1874 0, 0, /* could print option size, but that is af dependent. */
1875 1, /* timers active (only the expire timer) */
1876 jiffies_to_clock_t(ttd),
1877 req->num_timeout,
1878 from_kuid_munged(seq_user_ns(seq),
1879 sock_i_uid(req->rsk_listener)),
1880 0, /* non standard timer */
1881 0, /* open_requests have no inode */
1882 0, req);
1883 }
1884
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1885 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1886 {
1887 const struct in6_addr *dest, *src;
1888 __u16 destp, srcp;
1889 int timer_active;
1890 unsigned long timer_expires;
1891 const struct inet_sock *inet = inet_sk(sp);
1892 const struct tcp_sock *tp = tcp_sk(sp);
1893 const struct inet_connection_sock *icsk = inet_csk(sp);
1894 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1895 int rx_queue;
1896 int state;
1897
1898 dest = &sp->sk_v6_daddr;
1899 src = &sp->sk_v6_rcv_saddr;
1900 destp = ntohs(inet->inet_dport);
1901 srcp = ntohs(inet->inet_sport);
1902
1903 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1904 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1905 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1906 timer_active = 1;
1907 timer_expires = icsk->icsk_timeout;
1908 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1909 timer_active = 4;
1910 timer_expires = icsk->icsk_timeout;
1911 } else if (timer_pending(&sp->sk_timer)) {
1912 timer_active = 2;
1913 timer_expires = sp->sk_timer.expires;
1914 } else {
1915 timer_active = 0;
1916 timer_expires = jiffies;
1917 }
1918
1919 state = inet_sk_state_load(sp);
1920 if (state == TCP_LISTEN)
1921 rx_queue = sp->sk_ack_backlog;
1922 else
1923 /* Because we don't lock the socket,
1924 * we might find a transient negative value.
1925 */
1926 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1927 READ_ONCE(tp->copied_seq), 0);
1928
1929 seq_printf(seq,
1930 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1931 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1932 i,
1933 src->s6_addr32[0], src->s6_addr32[1],
1934 src->s6_addr32[2], src->s6_addr32[3], srcp,
1935 dest->s6_addr32[0], dest->s6_addr32[1],
1936 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1937 state,
1938 READ_ONCE(tp->write_seq) - tp->snd_una,
1939 rx_queue,
1940 timer_active,
1941 jiffies_delta_to_clock_t(timer_expires - jiffies),
1942 icsk->icsk_retransmits,
1943 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1944 icsk->icsk_probes_out,
1945 sock_i_ino(sp),
1946 refcount_read(&sp->sk_refcnt), sp,
1947 jiffies_to_clock_t(icsk->icsk_rto),
1948 jiffies_to_clock_t(icsk->icsk_ack.ato),
1949 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1950 tp->snd_cwnd,
1951 state == TCP_LISTEN ?
1952 fastopenq->max_qlen :
1953 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1954 );
1955 }
1956
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)1957 static void get_timewait6_sock(struct seq_file *seq,
1958 struct inet_timewait_sock *tw, int i)
1959 {
1960 long delta = tw->tw_timer.expires - jiffies;
1961 const struct in6_addr *dest, *src;
1962 __u16 destp, srcp;
1963
1964 dest = &tw->tw_v6_daddr;
1965 src = &tw->tw_v6_rcv_saddr;
1966 destp = ntohs(tw->tw_dport);
1967 srcp = ntohs(tw->tw_sport);
1968
1969 seq_printf(seq,
1970 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1971 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1972 i,
1973 src->s6_addr32[0], src->s6_addr32[1],
1974 src->s6_addr32[2], src->s6_addr32[3], srcp,
1975 dest->s6_addr32[0], dest->s6_addr32[1],
1976 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1977 tw->tw_substate, 0, 0,
1978 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1979 refcount_read(&tw->tw_refcnt), tw);
1980 }
1981
tcp6_seq_show(struct seq_file * seq,void * v)1982 static int tcp6_seq_show(struct seq_file *seq, void *v)
1983 {
1984 struct tcp_iter_state *st;
1985 struct sock *sk = v;
1986
1987 if (v == SEQ_START_TOKEN) {
1988 seq_puts(seq,
1989 " sl "
1990 "local_address "
1991 "remote_address "
1992 "st tx_queue rx_queue tr tm->when retrnsmt"
1993 " uid timeout inode\n");
1994 goto out;
1995 }
1996 st = seq->private;
1997
1998 if (sk->sk_state == TCP_TIME_WAIT)
1999 get_timewait6_sock(seq, v, st->num);
2000 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2001 get_openreq6(seq, v, st->num);
2002 else
2003 get_tcp6_sock(seq, v, st->num);
2004 out:
2005 return 0;
2006 }
2007
2008 static const struct seq_operations tcp6_seq_ops = {
2009 .show = tcp6_seq_show,
2010 .start = tcp_seq_start,
2011 .next = tcp_seq_next,
2012 .stop = tcp_seq_stop,
2013 };
2014
2015 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2016 .family = AF_INET6,
2017 };
2018
tcp6_proc_init(struct net * net)2019 int __net_init tcp6_proc_init(struct net *net)
2020 {
2021 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2022 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2023 return -ENOMEM;
2024 return 0;
2025 }
2026
tcp6_proc_exit(struct net * net)2027 void tcp6_proc_exit(struct net *net)
2028 {
2029 remove_proc_entry("tcp6", net->proc_net);
2030 }
2031 #endif
2032
2033 struct proto tcpv6_prot = {
2034 .name = "TCPv6",
2035 .owner = THIS_MODULE,
2036 .close = tcp_close,
2037 .pre_connect = tcp_v6_pre_connect,
2038 .connect = tcp_v6_connect,
2039 .disconnect = tcp_disconnect,
2040 .accept = inet_csk_accept,
2041 .ioctl = tcp_ioctl,
2042 .init = tcp_v6_init_sock,
2043 .destroy = tcp_v4_destroy_sock,
2044 .shutdown = tcp_shutdown,
2045 .setsockopt = tcp_setsockopt,
2046 .getsockopt = tcp_getsockopt,
2047 .keepalive = tcp_set_keepalive,
2048 .recvmsg = tcp_recvmsg,
2049 .sendmsg = tcp_sendmsg,
2050 .sendpage = tcp_sendpage,
2051 .backlog_rcv = tcp_v6_do_rcv,
2052 .release_cb = tcp_release_cb,
2053 .hash = inet6_hash,
2054 .unhash = inet_unhash,
2055 .get_port = inet_csk_get_port,
2056 .enter_memory_pressure = tcp_enter_memory_pressure,
2057 .leave_memory_pressure = tcp_leave_memory_pressure,
2058 .stream_memory_free = tcp_stream_memory_free,
2059 .sockets_allocated = &tcp_sockets_allocated,
2060 .memory_allocated = &tcp_memory_allocated,
2061 .memory_pressure = &tcp_memory_pressure,
2062 .orphan_count = &tcp_orphan_count,
2063 .sysctl_mem = sysctl_tcp_mem,
2064 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2065 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2066 .max_header = MAX_TCP_HEADER,
2067 .obj_size = sizeof(struct tcp6_sock),
2068 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2069 .twsk_prot = &tcp6_timewait_sock_ops,
2070 .rsk_prot = &tcp6_request_sock_ops,
2071 .h.hashinfo = &tcp_hashinfo,
2072 .no_autobind = true,
2073 #ifdef CONFIG_COMPAT
2074 .compat_setsockopt = compat_tcp_setsockopt,
2075 .compat_getsockopt = compat_tcp_getsockopt,
2076 #endif
2077 .diag_destroy = tcp_abort,
2078 };
2079
2080 static const struct inet6_protocol tcpv6_protocol = {
2081 .handler = tcp_v6_rcv,
2082 .err_handler = tcp_v6_err,
2083 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2084 };
2085
2086 static struct inet_protosw tcpv6_protosw = {
2087 .type = SOCK_STREAM,
2088 .protocol = IPPROTO_TCP,
2089 .prot = &tcpv6_prot,
2090 .ops = &inet6_stream_ops,
2091 .flags = INET_PROTOSW_PERMANENT |
2092 INET_PROTOSW_ICSK,
2093 };
2094
tcpv6_net_init(struct net * net)2095 static int __net_init tcpv6_net_init(struct net *net)
2096 {
2097 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2098 SOCK_RAW, IPPROTO_TCP, net);
2099 }
2100
tcpv6_net_exit(struct net * net)2101 static void __net_exit tcpv6_net_exit(struct net *net)
2102 {
2103 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2104 }
2105
tcpv6_net_exit_batch(struct list_head * net_exit_list)2106 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2107 {
2108 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2109 }
2110
2111 static struct pernet_operations tcpv6_net_ops = {
2112 .init = tcpv6_net_init,
2113 .exit = tcpv6_net_exit,
2114 .exit_batch = tcpv6_net_exit_batch,
2115 };
2116
tcpv6_init(void)2117 int __init tcpv6_init(void)
2118 {
2119 int ret;
2120
2121 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2122 if (ret)
2123 goto out;
2124
2125 /* register inet6 protocol */
2126 ret = inet6_register_protosw(&tcpv6_protosw);
2127 if (ret)
2128 goto out_tcpv6_protocol;
2129
2130 ret = register_pernet_subsys(&tcpv6_net_ops);
2131 if (ret)
2132 goto out_tcpv6_protosw;
2133 out:
2134 return ret;
2135
2136 out_tcpv6_protosw:
2137 inet6_unregister_protosw(&tcpv6_protosw);
2138 out_tcpv6_protocol:
2139 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2140 goto out;
2141 }
2142
tcpv6_exit(void)2143 void tcpv6_exit(void)
2144 {
2145 unregister_pernet_subsys(&tcpv6_net_ops);
2146 inet6_unregister_protosw(&tcpv6_protosw);
2147 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2148 }
2149