1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on:
9 * linux/net/ipv4/tcp.c
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
12 *
13 * Fixes:
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
34 #include <linux/in.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
46
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
66
67 #include <asm/uaccess.h>
68
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
71
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
74
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
78
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80
81 static const struct inet_connection_sock_af_ops ipv6_mapped;
82 static const struct inet_connection_sock_af_ops ipv6_specific;
83 #ifdef CONFIG_TCP_MD5SIG
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 #else
tcp_v6_md5_do_lookup(struct sock * sk,const struct in6_addr * addr)87 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
88 const struct in6_addr *addr)
89 {
90 return NULL;
91 }
92 #endif
93
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)94 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95 {
96 struct dst_entry *dst = skb_dst(skb);
97 const struct rt6_info *rt = (const struct rt6_info *)dst;
98
99 dst_hold(dst);
100 sk->sk_rx_dst = dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 if (rt->rt6i_node)
103 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
104 }
105
tcp_v6_hash(struct sock * sk)106 static void tcp_v6_hash(struct sock *sk)
107 {
108 if (sk->sk_state != TCP_CLOSE) {
109 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
110 tcp_prot.hash(sk);
111 return;
112 }
113 local_bh_disable();
114 __inet6_hash(sk, NULL);
115 local_bh_enable();
116 }
117 }
118
tcp_v6_init_sequence(const struct sk_buff * skb)119 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
120 {
121 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
122 ipv6_hdr(skb)->saddr.s6_addr32,
123 tcp_hdr(skb)->dest,
124 tcp_hdr(skb)->source);
125 }
126
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)127 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
128 int addr_len)
129 {
130 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
131 struct inet_sock *inet = inet_sk(sk);
132 struct inet_connection_sock *icsk = inet_csk(sk);
133 struct ipv6_pinfo *np = inet6_sk(sk);
134 struct tcp_sock *tp = tcp_sk(sk);
135 struct in6_addr *saddr = NULL, *final_p, final;
136 struct ipv6_txoptions *opt;
137 struct rt6_info *rt;
138 struct flowi6 fl6;
139 struct dst_entry *dst;
140 int addr_type;
141 int err;
142
143 if (addr_len < SIN6_LEN_RFC2133)
144 return -EINVAL;
145
146 if (usin->sin6_family != AF_INET6)
147 return -EAFNOSUPPORT;
148
149 memset(&fl6, 0, sizeof(fl6));
150
151 if (np->sndflow) {
152 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
153 IP6_ECN_flow_init(fl6.flowlabel);
154 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
155 struct ip6_flowlabel *flowlabel;
156 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
157 if (flowlabel == NULL)
158 return -EINVAL;
159 usin->sin6_addr = flowlabel->dst;
160 fl6_sock_release(flowlabel);
161 }
162 }
163
164 /*
165 * connect() to INADDR_ANY means loopback (BSD'ism).
166 */
167
168 if(ipv6_addr_any(&usin->sin6_addr))
169 usin->sin6_addr.s6_addr[15] = 0x1;
170
171 addr_type = ipv6_addr_type(&usin->sin6_addr);
172
173 if(addr_type & IPV6_ADDR_MULTICAST)
174 return -ENETUNREACH;
175
176 if (addr_type&IPV6_ADDR_LINKLOCAL) {
177 if (addr_len >= sizeof(struct sockaddr_in6) &&
178 usin->sin6_scope_id) {
179 /* If interface is set while binding, indices
180 * must coincide.
181 */
182 if (sk->sk_bound_dev_if &&
183 sk->sk_bound_dev_if != usin->sin6_scope_id)
184 return -EINVAL;
185
186 sk->sk_bound_dev_if = usin->sin6_scope_id;
187 }
188
189 /* Connect to link-local address requires an interface */
190 if (!sk->sk_bound_dev_if)
191 return -EINVAL;
192 }
193
194 if (tp->rx_opt.ts_recent_stamp &&
195 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
196 tp->rx_opt.ts_recent = 0;
197 tp->rx_opt.ts_recent_stamp = 0;
198 tp->write_seq = 0;
199 }
200
201 np->daddr = usin->sin6_addr;
202 np->flow_label = fl6.flowlabel;
203
204 /*
205 * TCP over IPv4
206 */
207
208 if (addr_type == IPV6_ADDR_MAPPED) {
209 u32 exthdrlen = icsk->icsk_ext_hdr_len;
210 struct sockaddr_in sin;
211
212 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
213
214 if (__ipv6_only_sock(sk))
215 return -ENETUNREACH;
216
217 sin.sin_family = AF_INET;
218 sin.sin_port = usin->sin6_port;
219 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
220
221 icsk->icsk_af_ops = &ipv6_mapped;
222 sk->sk_backlog_rcv = tcp_v4_do_rcv;
223 #ifdef CONFIG_TCP_MD5SIG
224 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
225 #endif
226
227 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
228
229 if (err) {
230 icsk->icsk_ext_hdr_len = exthdrlen;
231 icsk->icsk_af_ops = &ipv6_specific;
232 sk->sk_backlog_rcv = tcp_v6_do_rcv;
233 #ifdef CONFIG_TCP_MD5SIG
234 tp->af_specific = &tcp_sock_ipv6_specific;
235 #endif
236 goto failure;
237 } else {
238 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
239 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
240 &np->rcv_saddr);
241 }
242
243 return err;
244 }
245
246 if (!ipv6_addr_any(&np->rcv_saddr))
247 saddr = &np->rcv_saddr;
248
249 fl6.flowi6_proto = IPPROTO_TCP;
250 fl6.daddr = np->daddr;
251 fl6.saddr = saddr ? *saddr : np->saddr;
252 fl6.flowi6_oif = sk->sk_bound_dev_if;
253 fl6.flowi6_mark = sk->sk_mark;
254 fl6.fl6_dport = usin->sin6_port;
255 fl6.fl6_sport = inet->inet_sport;
256 fl6.flowi6_uid = sk->sk_uid;
257
258 opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
259 final_p = fl6_update_dst(&fl6, opt, &final);
260
261 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
262
263 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
264 if (IS_ERR(dst)) {
265 err = PTR_ERR(dst);
266 goto failure;
267 }
268
269 if (saddr == NULL) {
270 saddr = &fl6.saddr;
271 np->rcv_saddr = *saddr;
272 }
273
274 /* set the source address */
275 np->saddr = *saddr;
276 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
277
278 sk->sk_gso_type = SKB_GSO_TCPV6;
279 __ip6_dst_store(sk, dst, NULL, NULL);
280
281 rt = (struct rt6_info *) dst;
282 if (tcp_death_row.sysctl_tw_recycle &&
283 !tp->rx_opt.ts_recent_stamp &&
284 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
285 tcp_fetch_timewait_stamp(sk, dst);
286
287 icsk->icsk_ext_hdr_len = 0;
288 if (opt)
289 icsk->icsk_ext_hdr_len = opt->opt_flen +
290 opt->opt_nflen;
291
292 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
293
294 inet->inet_dport = usin->sin6_port;
295
296 tcp_set_state(sk, TCP_SYN_SENT);
297 err = inet6_hash_connect(&tcp_death_row, sk);
298 if (err)
299 goto late_failure;
300
301 if (!tp->write_seq && likely(!tp->repair))
302 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
303 np->daddr.s6_addr32,
304 inet->inet_sport,
305 inet->inet_dport);
306
307 err = tcp_connect(sk);
308 if (err)
309 goto late_failure;
310
311 return 0;
312
313 late_failure:
314 tcp_set_state(sk, TCP_CLOSE);
315 __sk_dst_reset(sk);
316 failure:
317 inet->inet_dport = 0;
318 sk->sk_route_caps = 0;
319 return err;
320 }
321
tcp_v6_mtu_reduced(struct sock * sk)322 static void tcp_v6_mtu_reduced(struct sock *sk)
323 {
324 struct dst_entry *dst;
325
326 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
327 return;
328
329 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
330 if (!dst)
331 return;
332
333 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
334 tcp_sync_mss(sk, dst_mtu(dst));
335 tcp_simple_retransmit(sk);
336 }
337 }
338
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)339 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
340 u8 type, u8 code, int offset, __be32 info)
341 {
342 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
343 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
344 struct ipv6_pinfo *np;
345 struct sock *sk;
346 int err;
347 struct tcp_sock *tp;
348 __u32 seq;
349 struct net *net = dev_net(skb->dev);
350
351 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
352 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
353
354 if (sk == NULL) {
355 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
356 ICMP6_MIB_INERRORS);
357 return;
358 }
359
360 if (sk->sk_state == TCP_TIME_WAIT) {
361 inet_twsk_put(inet_twsk(sk));
362 return;
363 }
364
365 bh_lock_sock(sk);
366 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
367 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
368
369 if (sk->sk_state == TCP_CLOSE)
370 goto out;
371
372 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
373 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
374 goto out;
375 }
376
377 tp = tcp_sk(sk);
378 seq = ntohl(th->seq);
379 if (sk->sk_state != TCP_LISTEN &&
380 !between(seq, tp->snd_una, tp->snd_nxt)) {
381 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
382 goto out;
383 }
384
385 np = inet6_sk(sk);
386
387 if (type == NDISC_REDIRECT) {
388 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
389
390 if (dst)
391 dst->ops->redirect(dst, sk, skb);
392 goto out;
393 }
394
395 if (type == ICMPV6_PKT_TOOBIG) {
396 /* We are not interested in TCP_LISTEN and open_requests
397 * (SYN-ACKs send out by Linux are always <576bytes so
398 * they should go through unfragmented).
399 */
400 if (sk->sk_state == TCP_LISTEN)
401 goto out;
402
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
407 &tp->tsq_flags))
408 sock_hold(sk);
409 goto out;
410 }
411
412 icmpv6_err_convert(type, code, &err);
413
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
417 case TCP_LISTEN:
418 if (sock_owned_by_user(sk))
419 goto out;
420
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
423 if (!req)
424 goto out;
425
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
428 */
429 WARN_ON(req->sk != NULL);
430
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
433 goto out;
434 }
435
436 inet_csk_reqsk_queue_drop(sk, req, prev);
437 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
438 goto out;
439
440 case TCP_SYN_SENT:
441 case TCP_SYN_RECV: /* Cannot happen.
442 It can, it SYNs are crossed. --ANK */
443 if (!sock_owned_by_user(sk)) {
444 sk->sk_err = err;
445 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
446
447 tcp_done(sk);
448 } else
449 sk->sk_err_soft = err;
450 goto out;
451 }
452
453 if (!sock_owned_by_user(sk) && np->recverr) {
454 sk->sk_err = err;
455 sk->sk_error_report(sk);
456 } else
457 sk->sk_err_soft = err;
458
459 out:
460 bh_unlock_sock(sk);
461 sock_put(sk);
462 }
463
464
tcp_v6_send_synack(struct sock * sk,struct dst_entry * dst,struct flowi6 * fl6,struct request_sock * req,u16 queue_mapping)465 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
466 struct flowi6 *fl6,
467 struct request_sock *req,
468 u16 queue_mapping)
469 {
470 struct inet6_request_sock *treq = inet6_rsk(req);
471 struct ipv6_pinfo *np = inet6_sk(sk);
472 struct sk_buff * skb;
473 int err = -ENOMEM;
474
475 /* First, grab a route. */
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
477 goto done;
478
479 skb = tcp_make_synack(sk, dst, req, NULL);
480
481 if (skb) {
482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
483
484 fl6->daddr = treq->rmt_addr;
485 skb_set_queue_mapping(skb, queue_mapping);
486 rcu_read_lock();
487 err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
488 np->tclass);
489 rcu_read_unlock();
490 err = net_xmit_eval(err);
491 }
492
493 done:
494 return err;
495 }
496
tcp_v6_rtx_synack(struct sock * sk,struct request_sock * req)497 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
498 {
499 struct flowi6 fl6;
500 int res;
501
502 res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
503 if (!res)
504 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
505 return res;
506 }
507
tcp_v6_reqsk_destructor(struct request_sock * req)508 static void tcp_v6_reqsk_destructor(struct request_sock *req)
509 {
510 kfree_skb(inet6_rsk(req)->pktopts);
511 }
512
513 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(struct sock * sk,const struct in6_addr * addr)514 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
515 const struct in6_addr *addr)
516 {
517 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
518 }
519
tcp_v6_md5_lookup(struct sock * sk,struct sock * addr_sk)520 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
521 struct sock *addr_sk)
522 {
523 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
524 }
525
tcp_v6_reqsk_md5_lookup(struct sock * sk,struct request_sock * req)526 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
527 struct request_sock *req)
528 {
529 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
530 }
531
tcp_v6_parse_md5_keys(struct sock * sk,char __user * optval,int optlen)532 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
533 int optlen)
534 {
535 struct tcp_md5sig cmd;
536 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
537
538 if (optlen < sizeof(cmd))
539 return -EINVAL;
540
541 if (copy_from_user(&cmd, optval, sizeof(cmd)))
542 return -EFAULT;
543
544 if (sin6->sin6_family != AF_INET6)
545 return -EINVAL;
546
547 if (!cmd.tcpm_keylen) {
548 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
549 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
550 AF_INET);
551 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
552 AF_INET6);
553 }
554
555 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
556 return -EINVAL;
557
558 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
559 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
560 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
561
562 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
563 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
564 }
565
tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,int nbytes)566 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
567 const struct in6_addr *daddr,
568 const struct in6_addr *saddr, int nbytes)
569 {
570 struct tcp6_pseudohdr *bp;
571 struct scatterlist sg;
572
573 bp = &hp->md5_blk.ip6;
574 /* 1. TCP pseudo-header (RFC2460) */
575 bp->saddr = *saddr;
576 bp->daddr = *daddr;
577 bp->protocol = cpu_to_be32(IPPROTO_TCP);
578 bp->len = cpu_to_be32(nbytes);
579
580 sg_init_one(&sg, bp, sizeof(*bp));
581 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
582 }
583
tcp_v6_md5_hash_hdr(char * md5_hash,struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)584 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
585 const struct in6_addr *daddr, struct in6_addr *saddr,
586 const struct tcphdr *th)
587 {
588 struct tcp_md5sig_pool *hp;
589 struct hash_desc *desc;
590
591 hp = tcp_get_md5sig_pool();
592 if (!hp)
593 goto clear_hash_noput;
594 desc = &hp->md5_desc;
595
596 if (crypto_hash_init(desc))
597 goto clear_hash;
598 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
599 goto clear_hash;
600 if (tcp_md5_hash_header(hp, th))
601 goto clear_hash;
602 if (tcp_md5_hash_key(hp, key))
603 goto clear_hash;
604 if (crypto_hash_final(desc, md5_hash))
605 goto clear_hash;
606
607 tcp_put_md5sig_pool();
608 return 0;
609
610 clear_hash:
611 tcp_put_md5sig_pool();
612 clear_hash_noput:
613 memset(md5_hash, 0, 16);
614 return 1;
615 }
616
tcp_v6_md5_hash_skb(char * md5_hash,struct tcp_md5sig_key * key,const struct sock * sk,const struct request_sock * req,const struct sk_buff * skb)617 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
618 const struct sock *sk,
619 const struct request_sock *req,
620 const struct sk_buff *skb)
621 {
622 const struct in6_addr *saddr, *daddr;
623 struct tcp_md5sig_pool *hp;
624 struct hash_desc *desc;
625 const struct tcphdr *th = tcp_hdr(skb);
626
627 if (sk) {
628 saddr = &inet6_sk(sk)->saddr;
629 daddr = &inet6_sk(sk)->daddr;
630 } else if (req) {
631 saddr = &inet6_rsk(req)->loc_addr;
632 daddr = &inet6_rsk(req)->rmt_addr;
633 } else {
634 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
635 saddr = &ip6h->saddr;
636 daddr = &ip6h->daddr;
637 }
638
639 hp = tcp_get_md5sig_pool();
640 if (!hp)
641 goto clear_hash_noput;
642 desc = &hp->md5_desc;
643
644 if (crypto_hash_init(desc))
645 goto clear_hash;
646
647 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
648 goto clear_hash;
649 if (tcp_md5_hash_header(hp, th))
650 goto clear_hash;
651 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
652 goto clear_hash;
653 if (tcp_md5_hash_key(hp, key))
654 goto clear_hash;
655 if (crypto_hash_final(desc, md5_hash))
656 goto clear_hash;
657
658 tcp_put_md5sig_pool();
659 return 0;
660
661 clear_hash:
662 tcp_put_md5sig_pool();
663 clear_hash_noput:
664 memset(md5_hash, 0, 16);
665 return 1;
666 }
667
tcp_v6_inbound_md5_hash(struct sock * sk,const struct sk_buff * skb)668 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
669 {
670 const __u8 *hash_location = NULL;
671 struct tcp_md5sig_key *hash_expected;
672 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
673 const struct tcphdr *th = tcp_hdr(skb);
674 int genhash;
675 u8 newhash[16];
676
677 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
678 hash_location = tcp_parse_md5sig_option(th);
679
680 /* We've parsed the options - do we have a hash? */
681 if (!hash_expected && !hash_location)
682 return 0;
683
684 if (hash_expected && !hash_location) {
685 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
686 return 1;
687 }
688
689 if (!hash_expected && hash_location) {
690 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
691 return 1;
692 }
693
694 /* check the signature */
695 genhash = tcp_v6_md5_hash_skb(newhash,
696 hash_expected,
697 NULL, NULL, skb);
698
699 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
700 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
701 genhash ? "failed" : "mismatch",
702 &ip6h->saddr, ntohs(th->source),
703 &ip6h->daddr, ntohs(th->dest));
704 return 1;
705 }
706 return 0;
707 }
708 #endif
709
710 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
711 .family = AF_INET6,
712 .obj_size = sizeof(struct tcp6_request_sock),
713 .rtx_syn_ack = tcp_v6_rtx_synack,
714 .send_ack = tcp_v6_reqsk_send_ack,
715 .destructor = tcp_v6_reqsk_destructor,
716 .send_reset = tcp_v6_send_reset,
717 .syn_ack_timeout = tcp_syn_ack_timeout,
718 };
719
720 #ifdef CONFIG_TCP_MD5SIG
721 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
722 .md5_lookup = tcp_v6_reqsk_md5_lookup,
723 .calc_md5_hash = tcp_v6_md5_hash_skb,
724 };
725 #endif
726
tcp_v6_send_response(struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,struct tcp_md5sig_key * key,int rst,u8 tclass)727 static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
728 u32 ack, u32 win, u32 tsval, u32 tsecr,
729 struct tcp_md5sig_key *key, int rst, u8 tclass)
730 {
731 const struct tcphdr *th = tcp_hdr(skb);
732 struct tcphdr *t1;
733 struct sk_buff *buff;
734 struct flowi6 fl6;
735 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
736 struct sock *ctl_sk = net->ipv6.tcp_sk;
737 unsigned int tot_len = sizeof(struct tcphdr);
738 struct dst_entry *dst;
739 __be32 *topt;
740
741 if (tsecr)
742 tot_len += TCPOLEN_TSTAMP_ALIGNED;
743 #ifdef CONFIG_TCP_MD5SIG
744 if (key)
745 tot_len += TCPOLEN_MD5SIG_ALIGNED;
746 #endif
747
748 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
749 GFP_ATOMIC);
750 if (buff == NULL)
751 return;
752
753 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
754
755 t1 = (struct tcphdr *) skb_push(buff, tot_len);
756 skb_reset_transport_header(buff);
757
758 /* Swap the send and the receive. */
759 memset(t1, 0, sizeof(*t1));
760 t1->dest = th->source;
761 t1->source = th->dest;
762 t1->doff = tot_len / 4;
763 t1->seq = htonl(seq);
764 t1->ack_seq = htonl(ack);
765 t1->ack = !rst || !th->ack;
766 t1->rst = rst;
767 t1->window = htons(win);
768
769 topt = (__be32 *)(t1 + 1);
770
771 if (tsecr) {
772 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
773 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
774 *topt++ = htonl(tsval);
775 *topt++ = htonl(tsecr);
776 }
777
778 #ifdef CONFIG_TCP_MD5SIG
779 if (key) {
780 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
781 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
782 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
783 &ipv6_hdr(skb)->saddr,
784 &ipv6_hdr(skb)->daddr, t1);
785 }
786 #endif
787
788 memset(&fl6, 0, sizeof(fl6));
789 fl6.daddr = ipv6_hdr(skb)->saddr;
790 fl6.saddr = ipv6_hdr(skb)->daddr;
791
792 buff->ip_summed = CHECKSUM_PARTIAL;
793 buff->csum = 0;
794
795 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
796
797 fl6.flowi6_proto = IPPROTO_TCP;
798 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
799 fl6.flowi6_oif = inet6_iif(skb);
800 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
801 fl6.fl6_dport = t1->dest;
802 fl6.fl6_sport = t1->source;
803 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
804 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
805
806 /* Pass a socket to ip6_dst_lookup either it is for RST
807 * Underlying function will use this to retrieve the network
808 * namespace
809 */
810 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
811 if (!IS_ERR(dst)) {
812 skb_dst_set(buff, dst);
813 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
814 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
815 if (rst)
816 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
817 return;
818 }
819
820 kfree_skb(buff);
821 }
822
tcp_v6_send_reset(struct sock * sk,struct sk_buff * skb)823 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
824 {
825 const struct tcphdr *th = tcp_hdr(skb);
826 u32 seq = 0, ack_seq = 0;
827 struct tcp_md5sig_key *key = NULL;
828 #ifdef CONFIG_TCP_MD5SIG
829 const __u8 *hash_location = NULL;
830 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
831 unsigned char newhash[16];
832 int genhash;
833 struct sock *sk1 = NULL;
834 #endif
835
836 if (th->rst)
837 return;
838
839 if (!ipv6_unicast_destination(skb))
840 return;
841
842 #ifdef CONFIG_TCP_MD5SIG
843 hash_location = tcp_parse_md5sig_option(th);
844 if (!sk && hash_location) {
845 /*
846 * active side is lost. Try to find listening socket through
847 * source port, and then find md5 key through listening socket.
848 * we are not loose security here:
849 * Incoming packet is checked with md5 hash with finding key,
850 * no RST generated if md5 hash doesn't match.
851 */
852 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
853 &tcp_hashinfo, &ipv6h->saddr,
854 th->source, &ipv6h->daddr,
855 ntohs(th->source), inet6_iif(skb));
856 if (!sk1)
857 return;
858
859 rcu_read_lock();
860 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
861 if (!key)
862 goto release_sk1;
863
864 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
865 if (genhash || memcmp(hash_location, newhash, 16) != 0)
866 goto release_sk1;
867 } else {
868 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
869 }
870 #endif
871
872 if (th->ack)
873 seq = ntohl(th->ack_seq);
874 else
875 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
876 (th->doff << 2);
877
878 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
879
880 #ifdef CONFIG_TCP_MD5SIG
881 release_sk1:
882 if (sk1) {
883 rcu_read_unlock();
884 sock_put(sk1);
885 }
886 #endif
887 }
888
tcp_v6_send_ack(struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,struct tcp_md5sig_key * key,u8 tclass)889 static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb,
890 u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr,
891 struct tcp_md5sig_key *key, u8 tclass)
892 {
893 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, key, 0,
894 tclass);
895 }
896
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)897 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
898 {
899 struct inet_timewait_sock *tw = inet_twsk(sk);
900 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
901
902 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
903 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
904 tcp_time_stamp + tcptw->tw_ts_offset,
905 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
906 tw->tw_tclass);
907
908 inet_twsk_put(tw);
909 }
910
tcp_v6_reqsk_send_ack(struct sock * sk,struct sk_buff * skb,struct request_sock * req)911 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
912 struct request_sock *req)
913 {
914 tcp_v6_send_ack(sk, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
915 req->rcv_wnd, tcp_time_stamp, req->ts_recent,
916 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
917 }
918
919
tcp_v6_hnd_req(struct sock * sk,struct sk_buff * skb)920 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
921 {
922 struct request_sock *req, **prev;
923 const struct tcphdr *th = tcp_hdr(skb);
924 struct sock *nsk;
925
926 /* Find possible connection requests. */
927 req = inet6_csk_search_req(sk, &prev, th->source,
928 &ipv6_hdr(skb)->saddr,
929 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
930 if (req)
931 return tcp_check_req(sk, skb, req, prev, false);
932
933 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
934 &ipv6_hdr(skb)->saddr, th->source,
935 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
936
937 if (nsk) {
938 if (nsk->sk_state != TCP_TIME_WAIT) {
939 bh_lock_sock(nsk);
940 return nsk;
941 }
942 inet_twsk_put(inet_twsk(nsk));
943 return NULL;
944 }
945
946 #ifdef CONFIG_SYN_COOKIES
947 if (!th->syn)
948 sk = cookie_v6_check(sk, skb);
949 #endif
950 return sk;
951 }
952
953 /* FIXME: this is substantially similar to the ipv4 code.
954 * Can some kind of merge be done? -- erics
955 */
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)956 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
957 {
958 struct tcp_options_received tmp_opt;
959 struct request_sock *req;
960 struct inet6_request_sock *treq;
961 struct ipv6_pinfo *np = inet6_sk(sk);
962 struct tcp_sock *tp = tcp_sk(sk);
963 __u32 isn = TCP_SKB_CB(skb)->when;
964 struct dst_entry *dst = NULL;
965 struct flowi6 fl6;
966 bool want_cookie = false;
967
968 if (skb->protocol == htons(ETH_P_IP))
969 return tcp_v4_conn_request(sk, skb);
970
971 if (!ipv6_unicast_destination(skb))
972 goto drop;
973
974 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
975 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
976 if (!want_cookie)
977 goto drop;
978 }
979
980 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
981 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
982 goto drop;
983 }
984
985 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
986 if (req == NULL)
987 goto drop;
988
989 #ifdef CONFIG_TCP_MD5SIG
990 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
991 #endif
992
993 tcp_clear_options(&tmp_opt);
994 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
995 tmp_opt.user_mss = tp->rx_opt.user_mss;
996 tcp_parse_options(skb, &tmp_opt, 0, NULL);
997
998 if (want_cookie && !tmp_opt.saw_tstamp)
999 tcp_clear_options(&tmp_opt);
1000
1001 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1002 tcp_openreq_init(req, &tmp_opt, skb);
1003
1004 treq = inet6_rsk(req);
1005 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1006 treq->loc_addr = ipv6_hdr(skb)->daddr;
1007 if (!want_cookie || tmp_opt.tstamp_ok)
1008 TCP_ECN_create_request(req, skb, sock_net(sk));
1009
1010 treq->iif = sk->sk_bound_dev_if;
1011 inet_rsk(req)->ir_mark = inet_request_mark(sk, skb);
1012
1013 /* So that link locals have meaning */
1014 if (!sk->sk_bound_dev_if &&
1015 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1016 treq->iif = inet6_iif(skb);
1017
1018 if (!isn) {
1019 if (ipv6_opt_accepted(sk, skb) ||
1020 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1021 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1022 atomic_inc(&skb->users);
1023 treq->pktopts = skb;
1024 }
1025
1026 if (want_cookie) {
1027 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1028 req->cookie_ts = tmp_opt.tstamp_ok;
1029 goto have_isn;
1030 }
1031
1032 /* VJ's idea. We save last timestamp seen
1033 * from the destination in peer table, when entering
1034 * state TIME-WAIT, and check against it before
1035 * accepting new connection request.
1036 *
1037 * If "isn" is not zero, this request hit alive
1038 * timewait bucket, so that all the necessary checks
1039 * are made in the function processing timewait state.
1040 */
1041 if (tmp_opt.saw_tstamp &&
1042 tcp_death_row.sysctl_tw_recycle &&
1043 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1044 if (!tcp_peer_is_proven(req, dst, true)) {
1045 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1046 goto drop_and_release;
1047 }
1048 }
1049 /* Kill the following clause, if you dislike this way. */
1050 else if (!sysctl_tcp_syncookies &&
1051 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1052 (sysctl_max_syn_backlog >> 2)) &&
1053 !tcp_peer_is_proven(req, dst, false)) {
1054 /* Without syncookies last quarter of
1055 * backlog is filled with destinations,
1056 * proven to be alive.
1057 * It means that we continue to communicate
1058 * to destinations, already remembered
1059 * to the moment of synflood.
1060 */
1061 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1062 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1063 goto drop_and_release;
1064 }
1065
1066 isn = tcp_v6_init_sequence(skb);
1067 }
1068 have_isn:
1069 tcp_rsk(req)->snt_isn = isn;
1070
1071 if (security_inet_conn_request(sk, skb, req))
1072 goto drop_and_release;
1073
1074 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1075 skb_get_queue_mapping(skb)) ||
1076 want_cookie)
1077 goto drop_and_free;
1078
1079 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1080 tcp_rsk(req)->listener = NULL;
1081 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1082 return 0;
1083
1084 drop_and_release:
1085 dst_release(dst);
1086 drop_and_free:
1087 reqsk_free(req);
1088 drop:
1089 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1090 return 0; /* don't send reset */
1091 }
1092
tcp_v6_syn_recv_sock(struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst)1093 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1094 struct request_sock *req,
1095 struct dst_entry *dst)
1096 {
1097 struct inet6_request_sock *treq;
1098 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1099 struct tcp6_sock *newtcp6sk;
1100 struct ipv6_txoptions *opt;
1101 struct inet_sock *newinet;
1102 struct tcp_sock *newtp;
1103 struct sock *newsk;
1104 #ifdef CONFIG_TCP_MD5SIG
1105 struct tcp_md5sig_key *key;
1106 #endif
1107 struct flowi6 fl6;
1108
1109 if (skb->protocol == htons(ETH_P_IP)) {
1110 /*
1111 * v6 mapped
1112 */
1113
1114 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1115
1116 if (newsk == NULL)
1117 return NULL;
1118
1119 newtcp6sk = (struct tcp6_sock *)newsk;
1120 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1121
1122 newinet = inet_sk(newsk);
1123 newnp = inet6_sk(newsk);
1124 newtp = tcp_sk(newsk);
1125
1126 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1127
1128 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1129
1130 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1131
1132 newnp->rcv_saddr = newnp->saddr;
1133
1134 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1135 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1136 #ifdef CONFIG_TCP_MD5SIG
1137 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1138 #endif
1139
1140 newnp->ipv6_mc_list = NULL;
1141 newnp->ipv6_ac_list = NULL;
1142 newnp->ipv6_fl_list = NULL;
1143 newnp->pktoptions = NULL;
1144 newnp->opt = NULL;
1145 newnp->mcast_oif = inet6_iif(skb);
1146 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1147 newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1148
1149 /*
1150 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1151 * here, tcp_create_openreq_child now does this for us, see the comment in
1152 * that function for the gory details. -acme
1153 */
1154
1155 /* It is tricky place. Until this moment IPv4 tcp
1156 worked with IPv6 icsk.icsk_af_ops.
1157 Sync it now.
1158 */
1159 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1160
1161 return newsk;
1162 }
1163
1164 treq = inet6_rsk(req);
1165
1166 if (sk_acceptq_is_full(sk))
1167 goto out_overflow;
1168
1169 if (!dst) {
1170 dst = inet6_csk_route_req(sk, &fl6, req);
1171 if (!dst)
1172 goto out;
1173 }
1174
1175 newsk = tcp_create_openreq_child(sk, req, skb);
1176 if (newsk == NULL)
1177 goto out_nonewsk;
1178
1179 /*
1180 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1181 * count here, tcp_create_openreq_child now does this for us, see the
1182 * comment in that function for the gory details. -acme
1183 */
1184
1185 newsk->sk_gso_type = SKB_GSO_TCPV6;
1186 __ip6_dst_store(newsk, dst, NULL, NULL);
1187 inet6_sk_rx_dst_set(newsk, skb);
1188
1189 newtcp6sk = (struct tcp6_sock *)newsk;
1190 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1191
1192 newtp = tcp_sk(newsk);
1193 newinet = inet_sk(newsk);
1194 newnp = inet6_sk(newsk);
1195
1196 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1197
1198 newnp->daddr = treq->rmt_addr;
1199 newnp->saddr = treq->loc_addr;
1200 newnp->rcv_saddr = treq->loc_addr;
1201 newsk->sk_bound_dev_if = treq->iif;
1202
1203 /* Now IPv6 options...
1204
1205 First: no IPv4 options.
1206 */
1207 newinet->inet_opt = NULL;
1208 newnp->ipv6_mc_list = NULL;
1209 newnp->ipv6_ac_list = NULL;
1210 newnp->ipv6_fl_list = NULL;
1211
1212 /* Clone RX bits */
1213 newnp->rxopt.all = np->rxopt.all;
1214
1215 /* Clone pktoptions received with SYN */
1216 newnp->pktoptions = NULL;
1217 if (treq->pktopts != NULL) {
1218 newnp->pktoptions = skb_clone(treq->pktopts,
1219 sk_gfp_atomic(sk, GFP_ATOMIC));
1220 consume_skb(treq->pktopts);
1221 treq->pktopts = NULL;
1222 if (newnp->pktoptions)
1223 skb_set_owner_r(newnp->pktoptions, newsk);
1224 }
1225 newnp->opt = NULL;
1226 newnp->mcast_oif = inet6_iif(skb);
1227 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1228 newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1229
1230 /* Clone native IPv6 options from listening socket (if any)
1231
1232 Yes, keeping reference count would be much more clever,
1233 but we make one more one thing there: reattach optmem
1234 to newsk.
1235 */
1236 opt = rcu_dereference(np->opt);
1237 if (opt) {
1238 opt = ipv6_dup_options(newsk, opt);
1239 RCU_INIT_POINTER(newnp->opt, opt);
1240 }
1241 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1242 if (opt)
1243 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1244 opt->opt_flen;
1245
1246 tcp_mtup_init(newsk);
1247 tcp_sync_mss(newsk, dst_mtu(dst));
1248 newtp->advmss = dst_metric_advmss(dst);
1249 if (tcp_sk(sk)->rx_opt.user_mss &&
1250 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1251 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1252
1253 tcp_initialize_rcv_mss(newsk);
1254 tcp_synack_rtt_meas(newsk, req);
1255 newtp->total_retrans = req->num_retrans;
1256
1257 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1258 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1259
1260 #ifdef CONFIG_TCP_MD5SIG
1261 /* Copy over the MD5 key from the original socket */
1262 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1263 /* We're using one, so create a matching key
1264 * on the newsk structure. If we fail to get
1265 * memory, then we end up not copying the key
1266 * across. Shucks.
1267 */
1268 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1269 AF_INET6, key->key, key->keylen,
1270 sk_gfp_atomic(sk, GFP_ATOMIC));
1271 }
1272 #endif
1273
1274 if (__inet_inherit_port(sk, newsk) < 0) {
1275 inet_csk_prepare_forced_close(newsk);
1276 tcp_done(newsk);
1277 goto out;
1278 }
1279 __inet6_hash(newsk, NULL);
1280
1281 return newsk;
1282
1283 out_overflow:
1284 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1285 out_nonewsk:
1286 dst_release(dst);
1287 out:
1288 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1289 return NULL;
1290 }
1291
tcp_v6_checksum_init(struct sk_buff * skb)1292 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1293 {
1294 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1295 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1296 &ipv6_hdr(skb)->daddr, skb->csum)) {
1297 skb->ip_summed = CHECKSUM_UNNECESSARY;
1298 return 0;
1299 }
1300 }
1301
1302 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1303 &ipv6_hdr(skb)->saddr,
1304 &ipv6_hdr(skb)->daddr, 0));
1305
1306 if (skb->len <= 76) {
1307 return __skb_checksum_complete(skb);
1308 }
1309 return 0;
1310 }
1311
1312 /* The socket must have it's spinlock held when we get
1313 * here.
1314 *
1315 * We have a potential double-lock case here, so even when
1316 * doing backlog processing we use the BH locking scheme.
1317 * This is because we cannot sleep with the original spinlock
1318 * held.
1319 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1320 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1321 {
1322 struct ipv6_pinfo *np = inet6_sk(sk);
1323 struct tcp_sock *tp;
1324 struct sk_buff *opt_skb = NULL;
1325
1326 /* Imagine: socket is IPv6. IPv4 packet arrives,
1327 goes to IPv4 receive handler and backlogged.
1328 From backlog it always goes here. Kerboom...
1329 Fortunately, tcp_rcv_established and rcv_established
1330 handle them correctly, but it is not case with
1331 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1332 */
1333
1334 if (skb->protocol == htons(ETH_P_IP))
1335 return tcp_v4_do_rcv(sk, skb);
1336
1337 #ifdef CONFIG_TCP_MD5SIG
1338 if (tcp_v6_inbound_md5_hash (sk, skb))
1339 goto discard;
1340 #endif
1341
1342 if (sk_filter(sk, skb))
1343 goto discard;
1344
1345 /*
1346 * socket locking is here for SMP purposes as backlog rcv
1347 * is currently called with bh processing disabled.
1348 */
1349
1350 /* Do Stevens' IPV6_PKTOPTIONS.
1351
1352 Yes, guys, it is the only place in our code, where we
1353 may make it not affecting IPv4.
1354 The rest of code is protocol independent,
1355 and I do not like idea to uglify IPv4.
1356
1357 Actually, all the idea behind IPV6_PKTOPTIONS
1358 looks not very well thought. For now we latch
1359 options, received in the last packet, enqueued
1360 by tcp. Feel free to propose better solution.
1361 --ANK (980728)
1362 */
1363 if (np->rxopt.all)
1364 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1365
1366 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1367 struct dst_entry *dst = sk->sk_rx_dst;
1368
1369 sock_rps_save_rxhash(sk, skb);
1370 if (dst) {
1371 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1372 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1373 dst_release(dst);
1374 sk->sk_rx_dst = NULL;
1375 }
1376 }
1377
1378 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1379 goto reset;
1380 if (opt_skb)
1381 goto ipv6_pktoptions;
1382 return 0;
1383 }
1384
1385 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1386 goto csum_err;
1387
1388 if (sk->sk_state == TCP_LISTEN) {
1389 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1390 if (!nsk)
1391 goto discard;
1392
1393 /*
1394 * Queue it on the new socket if the new socket is active,
1395 * otherwise we just shortcircuit this and continue with
1396 * the new socket..
1397 */
1398 if(nsk != sk) {
1399 sock_rps_save_rxhash(nsk, skb);
1400 if (tcp_child_process(sk, nsk, skb))
1401 goto reset;
1402 if (opt_skb)
1403 __kfree_skb(opt_skb);
1404 return 0;
1405 }
1406 } else
1407 sock_rps_save_rxhash(sk, skb);
1408
1409 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1410 goto reset;
1411 if (opt_skb)
1412 goto ipv6_pktoptions;
1413 return 0;
1414
1415 reset:
1416 tcp_v6_send_reset(sk, skb);
1417 discard:
1418 if (opt_skb)
1419 __kfree_skb(opt_skb);
1420 kfree_skb(skb);
1421 return 0;
1422 csum_err:
1423 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1424 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1425 goto discard;
1426
1427
1428 ipv6_pktoptions:
1429 /* Do you ask, what is it?
1430
1431 1. skb was enqueued by tcp.
1432 2. skb is added to tail of read queue, rather than out of order.
1433 3. socket is not in passive state.
1434 4. Finally, it really contains options, which user wants to receive.
1435 */
1436 tp = tcp_sk(sk);
1437 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1438 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1439 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1440 np->mcast_oif = inet6_iif(opt_skb);
1441 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1442 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1443 if (np->rxopt.bits.rxtclass)
1444 np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
1445 if (ipv6_opt_accepted(sk, opt_skb)) {
1446 skb_set_owner_r(opt_skb, sk);
1447 opt_skb = xchg(&np->pktoptions, opt_skb);
1448 } else {
1449 __kfree_skb(opt_skb);
1450 opt_skb = xchg(&np->pktoptions, NULL);
1451 }
1452 }
1453
1454 kfree_skb(opt_skb);
1455 return 0;
1456 }
1457
tcp_v6_rcv(struct sk_buff * skb)1458 static int tcp_v6_rcv(struct sk_buff *skb)
1459 {
1460 const struct tcphdr *th;
1461 const struct ipv6hdr *hdr;
1462 struct sock *sk;
1463 int ret;
1464 struct net *net = dev_net(skb->dev);
1465
1466 if (skb->pkt_type != PACKET_HOST)
1467 goto discard_it;
1468
1469 /*
1470 * Count it even if it's bad.
1471 */
1472 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1473
1474 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1475 goto discard_it;
1476
1477 th = tcp_hdr(skb);
1478
1479 if (th->doff < sizeof(struct tcphdr)/4)
1480 goto bad_packet;
1481 if (!pskb_may_pull(skb, th->doff*4))
1482 goto discard_it;
1483
1484 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1485 goto csum_error;
1486
1487 th = tcp_hdr(skb);
1488 hdr = ipv6_hdr(skb);
1489 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1490 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1491 skb->len - th->doff*4);
1492 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1493 TCP_SKB_CB(skb)->when = 0;
1494 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1495 TCP_SKB_CB(skb)->sacked = 0;
1496
1497 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1498 if (!sk)
1499 goto no_tcp_socket;
1500
1501 process:
1502 if (sk->sk_state == TCP_TIME_WAIT)
1503 goto do_time_wait;
1504
1505 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1506 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1507 goto discard_and_relse;
1508 }
1509
1510 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1511 goto discard_and_relse;
1512
1513 if (sk_filter(sk, skb))
1514 goto discard_and_relse;
1515
1516 skb->dev = NULL;
1517
1518 bh_lock_sock_nested(sk);
1519 ret = 0;
1520 if (!sock_owned_by_user(sk)) {
1521 #ifdef CONFIG_NET_DMA
1522 struct tcp_sock *tp = tcp_sk(sk);
1523 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1524 tp->ucopy.dma_chan = net_dma_find_channel();
1525 if (tp->ucopy.dma_chan)
1526 ret = tcp_v6_do_rcv(sk, skb);
1527 else
1528 #endif
1529 {
1530 if (!tcp_prequeue(sk, skb))
1531 ret = tcp_v6_do_rcv(sk, skb);
1532 }
1533 } else if (unlikely(sk_add_backlog(sk, skb,
1534 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1535 bh_unlock_sock(sk);
1536 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1537 goto discard_and_relse;
1538 }
1539 bh_unlock_sock(sk);
1540
1541 sock_put(sk);
1542 return ret ? -1 : 0;
1543
1544 no_tcp_socket:
1545 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1546 goto discard_it;
1547
1548 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1549 csum_error:
1550 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1551 bad_packet:
1552 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1553 } else {
1554 tcp_v6_send_reset(NULL, skb);
1555 }
1556
1557 discard_it:
1558 kfree_skb(skb);
1559 return 0;
1560
1561 discard_and_relse:
1562 sock_put(sk);
1563 goto discard_it;
1564
1565 do_time_wait:
1566 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1567 inet_twsk_put(inet_twsk(sk));
1568 goto discard_it;
1569 }
1570
1571 if (skb->len < (th->doff<<2)) {
1572 inet_twsk_put(inet_twsk(sk));
1573 goto bad_packet;
1574 }
1575 if (tcp_checksum_complete(skb)) {
1576 inet_twsk_put(inet_twsk(sk));
1577 goto csum_error;
1578 }
1579
1580 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1581 case TCP_TW_SYN:
1582 {
1583 struct sock *sk2;
1584
1585 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1586 &ipv6_hdr(skb)->saddr, th->source,
1587 &ipv6_hdr(skb)->daddr,
1588 ntohs(th->dest), inet6_iif(skb));
1589 if (sk2 != NULL) {
1590 struct inet_timewait_sock *tw = inet_twsk(sk);
1591 inet_twsk_deschedule(tw, &tcp_death_row);
1592 inet_twsk_put(tw);
1593 sk = sk2;
1594 goto process;
1595 }
1596 /* Fall through to ACK */
1597 }
1598 case TCP_TW_ACK:
1599 tcp_v6_timewait_ack(sk, skb);
1600 break;
1601 case TCP_TW_RST:
1602 goto no_tcp_socket;
1603 case TCP_TW_SUCCESS:;
1604 }
1605 goto discard_it;
1606 }
1607
tcp_v6_early_demux(struct sk_buff * skb)1608 static void tcp_v6_early_demux(struct sk_buff *skb)
1609 {
1610 const struct ipv6hdr *hdr;
1611 const struct tcphdr *th;
1612 struct sock *sk;
1613
1614 if (skb->pkt_type != PACKET_HOST)
1615 return;
1616
1617 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1618 return;
1619
1620 hdr = ipv6_hdr(skb);
1621 th = tcp_hdr(skb);
1622
1623 if (th->doff < sizeof(struct tcphdr) / 4)
1624 return;
1625
1626 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1627 &hdr->saddr, th->source,
1628 &hdr->daddr, ntohs(th->dest),
1629 inet6_iif(skb));
1630 if (sk) {
1631 skb->sk = sk;
1632 skb->destructor = sock_edemux;
1633 if (sk->sk_state != TCP_TIME_WAIT) {
1634 struct dst_entry *dst = sk->sk_rx_dst;
1635
1636 if (dst)
1637 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1638 if (dst &&
1639 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1640 skb_dst_set_noref(skb, dst);
1641 }
1642 }
1643 }
1644
1645 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1646 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1647 .twsk_unique = tcp_twsk_unique,
1648 .twsk_destructor= tcp_twsk_destructor,
1649 };
1650
1651 static const struct inet_connection_sock_af_ops ipv6_specific = {
1652 .queue_xmit = inet6_csk_xmit,
1653 .send_check = tcp_v6_send_check,
1654 .rebuild_header = inet6_sk_rebuild_header,
1655 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1656 .conn_request = tcp_v6_conn_request,
1657 .syn_recv_sock = tcp_v6_syn_recv_sock,
1658 .net_header_len = sizeof(struct ipv6hdr),
1659 .net_frag_header_len = sizeof(struct frag_hdr),
1660 .setsockopt = ipv6_setsockopt,
1661 .getsockopt = ipv6_getsockopt,
1662 .addr2sockaddr = inet6_csk_addr2sockaddr,
1663 .sockaddr_len = sizeof(struct sockaddr_in6),
1664 .bind_conflict = inet6_csk_bind_conflict,
1665 #ifdef CONFIG_COMPAT
1666 .compat_setsockopt = compat_ipv6_setsockopt,
1667 .compat_getsockopt = compat_ipv6_getsockopt,
1668 #endif
1669 };
1670
1671 #ifdef CONFIG_TCP_MD5SIG
1672 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1673 .md5_lookup = tcp_v6_md5_lookup,
1674 .calc_md5_hash = tcp_v6_md5_hash_skb,
1675 .md5_parse = tcp_v6_parse_md5_keys,
1676 };
1677 #endif
1678
1679 /*
1680 * TCP over IPv4 via INET6 API
1681 */
1682
1683 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1684 .queue_xmit = ip_queue_xmit,
1685 .send_check = tcp_v4_send_check,
1686 .rebuild_header = inet_sk_rebuild_header,
1687 .sk_rx_dst_set = inet_sk_rx_dst_set,
1688 .conn_request = tcp_v6_conn_request,
1689 .syn_recv_sock = tcp_v6_syn_recv_sock,
1690 .net_header_len = sizeof(struct iphdr),
1691 .setsockopt = ipv6_setsockopt,
1692 .getsockopt = ipv6_getsockopt,
1693 .addr2sockaddr = inet6_csk_addr2sockaddr,
1694 .sockaddr_len = sizeof(struct sockaddr_in6),
1695 .bind_conflict = inet6_csk_bind_conflict,
1696 #ifdef CONFIG_COMPAT
1697 .compat_setsockopt = compat_ipv6_setsockopt,
1698 .compat_getsockopt = compat_ipv6_getsockopt,
1699 #endif
1700 };
1701
1702 #ifdef CONFIG_TCP_MD5SIG
1703 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1704 .md5_lookup = tcp_v4_md5_lookup,
1705 .calc_md5_hash = tcp_v4_md5_hash_skb,
1706 .md5_parse = tcp_v6_parse_md5_keys,
1707 };
1708 #endif
1709
1710 /* NOTE: A lot of things set to zero explicitly by call to
1711 * sk_alloc() so need not be done here.
1712 */
tcp_v6_init_sock(struct sock * sk)1713 static int tcp_v6_init_sock(struct sock *sk)
1714 {
1715 struct inet_connection_sock *icsk = inet_csk(sk);
1716
1717 tcp_init_sock(sk);
1718
1719 icsk->icsk_af_ops = &ipv6_specific;
1720
1721 #ifdef CONFIG_TCP_MD5SIG
1722 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1723 #endif
1724
1725 return 0;
1726 }
1727
tcp_v6_destroy_sock(struct sock * sk)1728 static void tcp_v6_destroy_sock(struct sock *sk)
1729 {
1730 tcp_v4_destroy_sock(sk);
1731 inet6_destroy_sock(sk);
1732 }
1733
1734 #ifdef CONFIG_PROC_FS
1735 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct sock * sk,struct request_sock * req,int i,kuid_t uid)1736 static void get_openreq6(struct seq_file *seq,
1737 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1738 {
1739 int ttd = req->expires - jiffies;
1740 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1741 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1742
1743 if (ttd < 0)
1744 ttd = 0;
1745
1746 seq_printf(seq,
1747 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1748 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1749 i,
1750 src->s6_addr32[0], src->s6_addr32[1],
1751 src->s6_addr32[2], src->s6_addr32[3],
1752 ntohs(inet_rsk(req)->loc_port),
1753 dest->s6_addr32[0], dest->s6_addr32[1],
1754 dest->s6_addr32[2], dest->s6_addr32[3],
1755 ntohs(inet_rsk(req)->rmt_port),
1756 TCP_SYN_RECV,
1757 0,0, /* could print option size, but that is af dependent. */
1758 1, /* timers active (only the expire timer) */
1759 jiffies_to_clock_t(ttd),
1760 req->num_timeout,
1761 from_kuid_munged(seq_user_ns(seq), uid),
1762 0, /* non standard timer */
1763 0, /* open_requests have no inode */
1764 0, req);
1765 }
1766
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1767 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1768 {
1769 const struct in6_addr *dest, *src;
1770 __u16 destp, srcp;
1771 int timer_active;
1772 unsigned long timer_expires;
1773 const struct inet_sock *inet = inet_sk(sp);
1774 const struct tcp_sock *tp = tcp_sk(sp);
1775 const struct inet_connection_sock *icsk = inet_csk(sp);
1776 const struct ipv6_pinfo *np = inet6_sk(sp);
1777
1778 dest = &np->daddr;
1779 src = &np->rcv_saddr;
1780 destp = ntohs(inet->inet_dport);
1781 srcp = ntohs(inet->inet_sport);
1782
1783 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1784 timer_active = 1;
1785 timer_expires = icsk->icsk_timeout;
1786 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1787 timer_active = 4;
1788 timer_expires = icsk->icsk_timeout;
1789 } else if (timer_pending(&sp->sk_timer)) {
1790 timer_active = 2;
1791 timer_expires = sp->sk_timer.expires;
1792 } else {
1793 timer_active = 0;
1794 timer_expires = jiffies;
1795 }
1796
1797 seq_printf(seq,
1798 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1799 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1800 i,
1801 src->s6_addr32[0], src->s6_addr32[1],
1802 src->s6_addr32[2], src->s6_addr32[3], srcp,
1803 dest->s6_addr32[0], dest->s6_addr32[1],
1804 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1805 sp->sk_state,
1806 tp->write_seq-tp->snd_una,
1807 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1808 timer_active,
1809 jiffies_delta_to_clock_t(timer_expires - jiffies),
1810 icsk->icsk_retransmits,
1811 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1812 icsk->icsk_probes_out,
1813 sock_i_ino(sp),
1814 atomic_read(&sp->sk_refcnt), sp,
1815 jiffies_to_clock_t(icsk->icsk_rto),
1816 jiffies_to_clock_t(icsk->icsk_ack.ato),
1817 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1818 tp->snd_cwnd,
1819 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1820 );
1821 }
1822
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)1823 static void get_timewait6_sock(struct seq_file *seq,
1824 struct inet_timewait_sock *tw, int i)
1825 {
1826 const struct in6_addr *dest, *src;
1827 __u16 destp, srcp;
1828 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1829 long delta = tw->tw_ttd - jiffies;
1830
1831 dest = &tw6->tw_v6_daddr;
1832 src = &tw6->tw_v6_rcv_saddr;
1833 destp = ntohs(tw->tw_dport);
1834 srcp = ntohs(tw->tw_sport);
1835
1836 seq_printf(seq,
1837 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1838 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1839 i,
1840 src->s6_addr32[0], src->s6_addr32[1],
1841 src->s6_addr32[2], src->s6_addr32[3], srcp,
1842 dest->s6_addr32[0], dest->s6_addr32[1],
1843 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1844 tw->tw_substate, 0, 0,
1845 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1846 atomic_read(&tw->tw_refcnt), tw);
1847 }
1848
tcp6_seq_show(struct seq_file * seq,void * v)1849 static int tcp6_seq_show(struct seq_file *seq, void *v)
1850 {
1851 struct tcp_iter_state *st;
1852
1853 if (v == SEQ_START_TOKEN) {
1854 seq_puts(seq,
1855 " sl "
1856 "local_address "
1857 "remote_address "
1858 "st tx_queue rx_queue tr tm->when retrnsmt"
1859 " uid timeout inode\n");
1860 goto out;
1861 }
1862 st = seq->private;
1863
1864 switch (st->state) {
1865 case TCP_SEQ_STATE_LISTENING:
1866 case TCP_SEQ_STATE_ESTABLISHED:
1867 get_tcp6_sock(seq, v, st->num);
1868 break;
1869 case TCP_SEQ_STATE_OPENREQ:
1870 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1871 break;
1872 case TCP_SEQ_STATE_TIME_WAIT:
1873 get_timewait6_sock(seq, v, st->num);
1874 break;
1875 }
1876 out:
1877 return 0;
1878 }
1879
1880 static const struct file_operations tcp6_afinfo_seq_fops = {
1881 .owner = THIS_MODULE,
1882 .open = tcp_seq_open,
1883 .read = seq_read,
1884 .llseek = seq_lseek,
1885 .release = seq_release_net
1886 };
1887
1888 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1889 .name = "tcp6",
1890 .family = AF_INET6,
1891 .seq_fops = &tcp6_afinfo_seq_fops,
1892 .seq_ops = {
1893 .show = tcp6_seq_show,
1894 },
1895 };
1896
tcp6_proc_init(struct net * net)1897 int __net_init tcp6_proc_init(struct net *net)
1898 {
1899 return tcp_proc_register(net, &tcp6_seq_afinfo);
1900 }
1901
tcp6_proc_exit(struct net * net)1902 void tcp6_proc_exit(struct net *net)
1903 {
1904 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1905 }
1906 #endif
1907
tcp_v6_clear_sk(struct sock * sk,int size)1908 static void tcp_v6_clear_sk(struct sock *sk, int size)
1909 {
1910 struct inet_sock *inet = inet_sk(sk);
1911
1912 /* we do not want to clear pinet6 field, because of RCU lookups */
1913 sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
1914
1915 size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
1916 memset(&inet->pinet6 + 1, 0, size);
1917 }
1918
1919 struct proto tcpv6_prot = {
1920 .name = "TCPv6",
1921 .owner = THIS_MODULE,
1922 .close = tcp_close,
1923 .connect = tcp_v6_connect,
1924 .disconnect = tcp_disconnect,
1925 .accept = inet_csk_accept,
1926 .ioctl = tcp_ioctl,
1927 .init = tcp_v6_init_sock,
1928 .destroy = tcp_v6_destroy_sock,
1929 .shutdown = tcp_shutdown,
1930 .setsockopt = tcp_setsockopt,
1931 .getsockopt = tcp_getsockopt,
1932 .recvmsg = tcp_recvmsg,
1933 .sendmsg = tcp_sendmsg,
1934 .sendpage = tcp_sendpage,
1935 .backlog_rcv = tcp_v6_do_rcv,
1936 .release_cb = tcp_release_cb,
1937 .mtu_reduced = tcp_v6_mtu_reduced,
1938 .hash = tcp_v6_hash,
1939 .unhash = inet_unhash,
1940 .get_port = inet_csk_get_port,
1941 .enter_memory_pressure = tcp_enter_memory_pressure,
1942 .sockets_allocated = &tcp_sockets_allocated,
1943 .memory_allocated = &tcp_memory_allocated,
1944 .memory_pressure = &tcp_memory_pressure,
1945 .orphan_count = &tcp_orphan_count,
1946 .sysctl_wmem = sysctl_tcp_wmem,
1947 .sysctl_rmem = sysctl_tcp_rmem,
1948 .max_header = MAX_TCP_HEADER,
1949 .obj_size = sizeof(struct tcp6_sock),
1950 .slab_flags = SLAB_DESTROY_BY_RCU,
1951 .twsk_prot = &tcp6_timewait_sock_ops,
1952 .rsk_prot = &tcp6_request_sock_ops,
1953 .h.hashinfo = &tcp_hashinfo,
1954 .no_autobind = true,
1955 #ifdef CONFIG_COMPAT
1956 .compat_setsockopt = compat_tcp_setsockopt,
1957 .compat_getsockopt = compat_tcp_getsockopt,
1958 #endif
1959 #ifdef CONFIG_MEMCG_KMEM
1960 .proto_cgroup = tcp_proto_cgroup,
1961 #endif
1962 .clear_sk = tcp_v6_clear_sk,
1963 .diag_destroy = tcp_abort,
1964 };
1965
1966 static const struct inet6_protocol tcpv6_protocol = {
1967 .early_demux = tcp_v6_early_demux,
1968 .handler = tcp_v6_rcv,
1969 .err_handler = tcp_v6_err,
1970 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1971 };
1972
1973 static struct inet_protosw tcpv6_protosw = {
1974 .type = SOCK_STREAM,
1975 .protocol = IPPROTO_TCP,
1976 .prot = &tcpv6_prot,
1977 .ops = &inet6_stream_ops,
1978 .no_check = 0,
1979 .flags = INET_PROTOSW_PERMANENT |
1980 INET_PROTOSW_ICSK,
1981 };
1982
tcpv6_net_init(struct net * net)1983 static int __net_init tcpv6_net_init(struct net *net)
1984 {
1985 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1986 SOCK_RAW, IPPROTO_TCP, net);
1987 }
1988
tcpv6_net_exit(struct net * net)1989 static void __net_exit tcpv6_net_exit(struct net *net)
1990 {
1991 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1992 }
1993
tcpv6_net_exit_batch(struct list_head * net_exit_list)1994 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1995 {
1996 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
1997 }
1998
1999 static struct pernet_operations tcpv6_net_ops = {
2000 .init = tcpv6_net_init,
2001 .exit = tcpv6_net_exit,
2002 .exit_batch = tcpv6_net_exit_batch,
2003 };
2004
tcpv6_init(void)2005 int __init tcpv6_init(void)
2006 {
2007 int ret;
2008
2009 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2010 if (ret)
2011 goto out;
2012
2013 /* register inet6 protocol */
2014 ret = inet6_register_protosw(&tcpv6_protosw);
2015 if (ret)
2016 goto out_tcpv6_protocol;
2017
2018 ret = register_pernet_subsys(&tcpv6_net_ops);
2019 if (ret)
2020 goto out_tcpv6_protosw;
2021 out:
2022 return ret;
2023
2024 out_tcpv6_protosw:
2025 inet6_unregister_protosw(&tcpv6_protosw);
2026 out_tcpv6_protocol:
2027 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2028 goto out;
2029 }
2030
tcpv6_exit(void)2031 void tcpv6_exit(void)
2032 {
2033 unregister_pernet_subsys(&tcpv6_net_ops);
2034 inet6_unregister_protosw(&tcpv6_protosw);
2035 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2036 }
2037