1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Based on net/ipv4/tcp.c
4 * Authors: Ross Biro
5 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
6 * Mark Evans, <evansmp@uhura.aston.ac.uk>
7 * Corey Minyard <wf-rch!minyard@relay.EU.net>
8 * Florian La Roche, <flla@stud.uni-sb.de>
9 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
10 * Linus Torvalds, <torvalds@cs.helsinki.fi>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Matthew Dillon, <dillon@apollo.west.oic.com>
13 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
14 * Jorge Cwik, <jorge@laser.satlink.net>
15 *
16 * Fixes:
17 * Alan Cox : Numerous verify_area() calls
18 * Alan Cox : Set the ACK bit on a reset
19 * Alan Cox : Stopped it crashing if it closed while
20 * sk->inuse=1 and was trying to connect
21 * (tcp_err()).
22 * Alan Cox : All icmp error handling was broken
23 * pointers passed where wrong and the
24 * socket was looked up backwards. Nobody
25 * tested any icmp error code obviously.
26 * Alan Cox : tcp_err() now handled properly. It
27 * wakes people on errors. poll
28 * behaves and the icmp error race
29 * has gone by moving it into sock.c
30 * Alan Cox : tcp_send_reset() fixed to work for
31 * everything not just packets for
32 * unknown sockets.
33 * Alan Cox : tcp option processing.
34 * Alan Cox : Reset tweaked (still not 100%) [Had
35 * syn rule wrong]
36 * Herp Rosmanith : More reset fixes
37 * Alan Cox : No longer acks invalid rst frames.
38 * Acking any kind of RST is right out.
39 * Alan Cox : Sets an ignore me flag on an rst
40 * receive otherwise odd bits of prattle
41 * escape still
42 * Alan Cox : Fixed another acking RST frame bug.
43 * Should stop LAN workplace lockups.
44 * Alan Cox : Some tidyups using the new skb list
45 * facilities
46 * Alan Cox : sk->keepopen now seems to work
47 * Alan Cox : Pulls options out correctly on accepts
48 * Alan Cox : Fixed assorted sk->rqueue->next errors
49 * Alan Cox : PSH doesn't end a TCP read. Switched a
50 * bit to skb ops.
51 * Alan Cox : Tidied tcp_data to avoid a potential
52 * nasty.
53 * Alan Cox : Added some better commenting, as the
54 * tcp is hard to follow
55 * Alan Cox : Removed incorrect check for 20 * psh
56 * Michael O'Reilly : ack < copied bug fix.
57 * Johannes Stille : Misc tcp fixes (not all in yet).
58 * Alan Cox : FIN with no memory -> CRASH
59 * Alan Cox : Added socket option proto entries.
60 * Also added awareness of them to accept.
61 * Alan Cox : Added TCP options (SOL_TCP)
62 * Alan Cox : Switched wakeup calls to callbacks,
63 * so the kernel can layer network
64 * sockets.
65 * Alan Cox : Use ip_tos/ip_ttl settings.
66 * Alan Cox : Handle FIN (more) properly (we hope).
67 * Alan Cox : RST frames sent on unsynchronised
68 * state ack error.
69 * Alan Cox : Put in missing check for SYN bit.
70 * Alan Cox : Added tcp_select_window() aka NET2E
71 * window non shrink trick.
72 * Alan Cox : Added a couple of small NET2E timer
73 * fixes
74 * Charles Hedrick : TCP fixes
75 * Toomas Tamm : TCP window fixes
76 * Alan Cox : Small URG fix to rlogin ^C ack fight
77 * Charles Hedrick : Rewrote most of it to actually work
78 * Linus : Rewrote tcp_read() and URG handling
79 * completely
80 * Gerhard Koerting: Fixed some missing timer handling
81 * Matthew Dillon : Reworked TCP machine states as per RFC
82 * Gerhard Koerting: PC/TCP workarounds
83 * Adam Caldwell : Assorted timer/timing errors
84 * Matthew Dillon : Fixed another RST bug
85 * Alan Cox : Move to kernel side addressing changes.
86 * Alan Cox : Beginning work on TCP fastpathing
87 * (not yet usable)
88 * Arnt Gulbrandsen: Turbocharged tcp_check() routine.
89 * Alan Cox : TCP fast path debugging
90 * Alan Cox : Window clamping
91 * Michael Riepe : Bug in tcp_check()
92 * Matt Dillon : More TCP improvements and RST bug fixes
93 * Matt Dillon : Yet more small nasties remove from the
94 * TCP code (Be very nice to this man if
95 * tcp finally works 100%) 8)
96 * Alan Cox : BSD accept semantics.
97 * Alan Cox : Reset on closedown bug.
98 * Peter De Schrijver : ENOTCONN check missing in tcp_sendto().
99 * Michael Pall : Handle poll() after URG properly in
100 * all cases.
101 * Michael Pall : Undo the last fix in tcp_read_urg()
102 * (multi URG PUSH broke rlogin).
103 * Michael Pall : Fix the multi URG PUSH problem in
104 * tcp_readable(), poll() after URG
105 * works now.
106 * Michael Pall : recv(...,MSG_OOB) never blocks in the
107 * BSD api.
108 * Alan Cox : Changed the semantics of sk->socket to
109 * fix a race and a signal problem with
110 * accept() and async I/O.
111 * Alan Cox : Relaxed the rules on tcp_sendto().
112 * Yury Shevchuk : Really fixed accept() blocking problem.
113 * Craig I. Hagan : Allow for BSD compatible TIME_WAIT for
114 * clients/servers which listen in on
115 * fixed ports.
116 * Alan Cox : Cleaned the above up and shrank it to
117 * a sensible code size.
118 * Alan Cox : Self connect lockup fix.
119 * Alan Cox : No connect to multicast.
120 * Ross Biro : Close unaccepted children on master
121 * socket close.
122 * Alan Cox : Reset tracing code.
123 * Alan Cox : Spurious resets on shutdown.
124 * Alan Cox : Giant 15 minute/60 second timer error
125 * Alan Cox : Small whoops in polling before an
126 * accept.
127 * Alan Cox : Kept the state trace facility since
128 * it's handy for debugging.
129 * Alan Cox : More reset handler fixes.
130 * Alan Cox : Started rewriting the code based on
131 * the RFC's for other useful protocol
132 * references see: Comer, KA9Q NOS, and
133 * for a reference on the difference
134 * between specifications and how BSD
135 * works see the 4.4lite source.
136 * A.N.Kuznetsov : Don't time wait on completion of tidy
137 * close.
138 * Linus Torvalds : Fin/Shutdown & copied_seq changes.
139 * Linus Torvalds : Fixed BSD port reuse to work first syn
140 * Alan Cox : Reimplemented timers as per the RFC
141 * and using multiple timers for sanity.
142 * Alan Cox : Small bug fixes, and a lot of new
143 * comments.
144 * Alan Cox : Fixed dual reader crash by locking
145 * the buffers (much like datagram.c)
146 * Alan Cox : Fixed stuck sockets in probe. A probe
147 * now gets fed up of retrying without
148 * (even a no space) answer.
149 * Alan Cox : Extracted closing code better
150 * Alan Cox : Fixed the closing state machine to
151 * resemble the RFC.
152 * Alan Cox : More 'per spec' fixes.
153 * Jorge Cwik : Even faster checksumming.
154 * Alan Cox : tcp_data() doesn't ack illegal PSH
155 * only frames. At least one pc tcp stack
156 * generates them.
157 * Alan Cox : Cache last socket.
158 * Alan Cox : Per route irtt.
159 * Matt Day : poll()->select() match BSD precisely on error
160 * Alan Cox : New buffers
161 * Marc Tamsky : Various sk->prot->retransmits and
162 * sk->retransmits misupdating fixed.
163 * Fixed tcp_write_timeout: stuck close,
164 * and TCP syn retries gets used now.
165 * Mark Yarvis : In tcp_read_wakeup(), don't send an
166 * ack if state is TCP_CLOSED.
167 * Alan Cox : Look up device on a retransmit - routes may
168 * change. Doesn't yet cope with MSS shrink right
169 * but it's a start!
170 * Marc Tamsky : Closing in closing fixes.
171 * Mike Shaver : RFC1122 verifications.
172 * Alan Cox : rcv_saddr errors.
173 * Alan Cox : Block double connect().
174 * Alan Cox : Small hooks for enSKIP.
175 * Alexey Kuznetsov: Path MTU discovery.
176 * Alan Cox : Support soft errors.
177 * Alan Cox : Fix MTU discovery pathological case
178 * when the remote claims no mtu!
179 * Marc Tamsky : TCP_CLOSE fix.
180 * Colin (G3TNE) : Send a reset on syn ack replies in
181 * window but wrong (fixes NT lpd problems)
182 * Pedro Roque : Better TCP window handling, delayed ack.
183 * Joerg Reuter : No modification of locked buffers in
184 * tcp_do_retransmit()
185 * Eric Schenk : Changed receiver side silly window
186 * avoidance algorithm to BSD style
187 * algorithm. This doubles throughput
188 * against machines running Solaris,
189 * and seems to result in general
190 * improvement.
191 * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD
192 * Willy Konynenberg : Transparent proxying support.
193 * Mike McLagan : Routing by source
194 * Keith Owens : Do proper merging with partial SKB's in
195 * tcp_do_sendmsg to avoid burstiness.
196 * Eric Schenk : Fix fast close down bug with
197 * shutdown() followed by close().
198 * Andi Kleen : Make poll agree with SIGIO
199 * Salvatore Sanfilippo : Support SO_LINGER with linger == 1 and
200 * lingertime == 0 (RFC 793 ABORT Call)
201 * Hirokazu Takahashi : Use copy_from_user() instead of
202 * csum_and_copy_from_user() if possible.
203 *
204 * Based on net/ipv4/tcp_ipv4.c
205 * See tcp.c for author information
206 *
207 * Changes:
208 * David S. Miller : New socket lookup architecture.
209 * This code is dedicated to John Dyson.
210 * David S. Miller : Change semantics of established hash,
211 * half is devoted to TIME_WAIT sockets
212 * and the rest go in the other half.
213 * Andi Kleen : Add support for syncookies and fixed
214 * some bugs: ip options weren't passed to
215 * the TCP layer, missed a check for an
216 * ACK bit.
217 * Andi Kleen : Implemented fast path mtu discovery.
218 * Fixed many serious bugs in the
219 * request_sock handling and moved
220 * most of it into the af independent code.
221 * Added tail drop and some other bugfixes.
222 * Added new listen semantics.
223 * Mike McLagan : Routing by source
224 * Juan Jose Ciarlante: ip_dynaddr bits
225 * Andi Kleen: various fixes.
226 * Vitaly E. Lavrov : Transparent proxy revived after year
227 * coma.
228 * Andi Kleen : Fix new listen.
229 * Andi Kleen : Fix accept error reporting.
230 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
231 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
232 * a single port at the same time.
233 *
234 * Based on net/ipv6/tcp_ipv6.c
235 * Authors:
236 * Pedro Roque <roque@di.fc.ul.pt>
237 *
238 * Fixes:
239 * Hideaki YOSHIFUJI : sin6_scope_id support
240 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
241 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
242 * a single port at the same time.
243 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
244 *
245 * Based on net/core/stream.c
246 * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
247 * (from old tcp.c code)
248 * Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
249 *
250 * Based on net/ipv4/tcp_output.c
251 * Authors: Ross Biro
252 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
253 * Mark Evans, <evansmp@uhura.aston.ac.uk>
254 * Corey Minyard <wf-rch!minyard@relay.EU.net>
255 * Florian La Roche, <flla@stud.uni-sb.de>
256 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
257 * Linus Torvalds, <torvalds@cs.helsinki.fi>
258 * Alan Cox, <gw4pts@gw4pts.ampr.org>
259 * Matthew Dillon, <dillon@apollo.west.oic.com>
260 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
261 * Jorge Cwik, <jorge@laser.satlink.net>
262 *
263 * Changes: Pedro Roque : Retransmit queue handled by TCP.
264 * : Fragmentation on mtu decrease
265 * : Segment collapse on retransmit
266 * : AF independence
267 *
268 * Linus Torvalds : send_delayed_ack
269 * David S. Miller : Charge memory using the right skb
270 * during syn/ack processing.
271 * David S. Miller : Output engine completely rewritten.
272 * Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
273 * Cacophonix Gaul : draft-minshall-nagle-01
274 * J Hadi Salim : ECN support
275 *
276 * Based on net/ipv4/tcp_input.c
277 * Authors: Ross Biro
278 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
279 * Mark Evans, <evansmp@uhura.aston.ac.uk>
280 * Corey Minyard <wf-rch!minyard@relay.EU.net>
281 * Florian La Roche, <flla@stud.uni-sb.de>
282 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
283 * Linus Torvalds, <torvalds@cs.helsinki.fi>
284 * Alan Cox, <gw4pts@gw4pts.ampr.org>
285 * Matthew Dillon, <dillon@apollo.west.oic.com>
286 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
287 * Jorge Cwik, <jorge@laser.satlink.net>
288 *
289 * Changes:
290 * Pedro Roque : Fast Retransmit/Recovery.
291 * Two receive queues.
292 * Retransmit queue handled by TCP.
293 * Better retransmit timer handling.
294 * New congestion avoidance.
295 * Header prediction.
296 * Variable renaming.
297 *
298 * Eric : Fast Retransmit.
299 * Randy Scott : MSS option defines.
300 * Eric Schenk : Fixes to slow start algorithm.
301 * Eric Schenk : Yet another double ACK bug.
302 * Eric Schenk : Delayed ACK bug fixes.
303 * Eric Schenk : Floyd style fast retrans war avoidance.
304 * David S. Miller : Don't allow zero congestion window.
305 * Eric Schenk : Fix retransmitter so that it sends
306 * next packet on ack of previous packet.
307 * Andi Kleen : Moved open_request checking here
308 * and process RSTs for open_requests.
309 * Andi Kleen : Better prune_queue, and other fixes.
310 * Andrey Savochkin: Fix RTT measurements in the presence of
311 * timestamps.
312 * Andrey Savochkin: Check sequence numbers correctly when
313 * removing SACKs due to in sequence incoming
314 * data segments.
315 * Andi Kleen: Make sure we never ack data there is not
316 * enough room for. Also make this condition
317 * a fatal error if it might still happen.
318 * Andi Kleen: Add tcp_measure_rcv_mss to make
319 * connections with MSS<min(MTU,ann. MSS)
320 * work without delayed acks.
321 * Andi Kleen: Process packets with PSH set in the
322 * fast path.
323 * J Hadi Salim: ECN support
324 * Andrei Gurtov,
325 * Pasi Sarolahti,
326 * Panu Kuhlberg: Experimental audit of TCP (re)transmission
327 * engine. Lots of bugs are found.
328 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs
329 *
330 * NewIP INET
331 * An implementation of the TCP/IP protocol suite for the LINUX
332 * operating system. NewIP INET is implemented using the BSD Socket
333 * interface as the means of communication with the user level.
334 *
335 * Implementation of the Transmission Control Protocol(TCP).
336 *
337 * TCP over NewIP
338 *
339 * Description of States:
340 *
341 * TCP_SYN_SENT sent a connection request, waiting for ack
342 *
343 * TCP_SYN_RECV received a connection request, sent ack,
344 * waiting for final ack in three-way handshake.
345 *
346 * TCP_ESTABLISHED connection established
347 *
348 * TCP_FIN_WAIT1 our side has shutdown, waiting to complete
349 * transmission of remaining buffered data
350 *
351 * TCP_FIN_WAIT2 all buffered data sent, waiting for remote
352 * to shutdown
353 *
354 * TCP_CLOSING both sides have shutdown but we still have
355 * data we have to finish sending
356 *
357 * TCP_TIME_WAIT timeout to catch resent junk before entering
358 * closed, can only be entered from FIN_WAIT2
359 * or CLOSING. Required because the other end
360 * may not have gotten our last ACK causing it
361 * to retransmit the data packet (which we ignore)
362 *
363 * TCP_CLOSE_WAIT remote side has shutdown and is waiting for
364 * us to finish writing our data and to shutdown
365 * (we have to close() to move on to LAST_ACK)
366 *
367 * TCP_LAST_ACK out side has shutdown after remote has
368 * shutdown. There may still be data in our
369 * buffer that we have to finish sending
370 *
371 * TCP_CLOSE socket is finished
372 */
373 #define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__
374
375 #include <linux/module.h>
376 #include <linux/errno.h>
377 #include <linux/types.h>
378 #include <linux/socket.h>
379 #include <linux/sockios.h>
380 #include <linux/net.h>
381 #include <linux/jiffies.h>
382 #include <linux/netdevice.h>
383 #include <linux/init.h>
384 #include <linux/jhash.h>
385 #include <linux/times.h>
386 #include <linux/random.h>
387 #include <linux/seq_file.h>
388
389 #include <net/tcp.h>
390 #include <net/ninet_hashtables.h>
391 #include <net/ninet_connection_sock.h>
392 #include <net/protocol.h>
393 #include <net/dsfield.h>
394 #include <net/timewait_sock.h>
395 #include <net/inet_common.h>
396 #include <net/secure_seq.h>
397 #include <net/nip.h>
398 #include <net/tcp_nip.h>
399 #include <net/nip_addrconf.h>
400 #include <net/nip_route.h>
401 #include <linux/nip.h>
402 #include "nip_checksum.h"
403 #include "nip_hdr.h"
404 #include "tcp_nip_parameter.h"
405
406 #define tcp_header_length(th) ((th)->doff << 2)
407 #define TCP_ACK_NUM_MULTIPLIER 20
408 #define TCP_WINDOW_RAISE_THRESHOLD 2
409 #define TCP_BACKLOG_HEADROOM (64 * 1024)
410 #define BYTES_PER_TCP_HEADER 4
411
412 static const struct inet_connection_sock_af_ops newip_specific;
413
tcp_nip_push(struct sock * sk,int flags,int mss_now,int nonagle,int size_goal)414 static void tcp_nip_push(struct sock *sk, int flags, int mss_now,
415 int nonagle, int size_goal)
416 {
417 __tcp_nip_push_pending_frames(sk, mss_now, nonagle);
418 }
419
420 static const unsigned char new_state[16] = {
421 /* current state: new state: action: */
422 [0] = TCP_CLOSE,
423 [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
424 [TCP_SYN_SENT] = TCP_CLOSE,
425 [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
426 [TCP_FIN_WAIT1] = TCP_FIN_WAIT1,
427 [TCP_FIN_WAIT2] = TCP_FIN_WAIT2,
428 [TCP_TIME_WAIT] = TCP_CLOSE,
429 [TCP_CLOSE] = TCP_CLOSE,
430 [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN,
431 [TCP_LAST_ACK] = TCP_LAST_ACK,
432 [TCP_LISTEN] = TCP_CLOSE,
433 [TCP_CLOSING] = TCP_CLOSING,
434 [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
435 };
436
nip_get_tcp_input_checksum(struct sk_buff * skb)437 bool nip_get_tcp_input_checksum(struct sk_buff *skb)
438 {
439 struct nip_pseudo_header nph = {0};
440
441 nph.nexthdr = nipcb(skb)->nexthdr;
442 nph.saddr = nipcb(skb)->srcaddr;
443 nph.daddr = nipcb(skb)->dstaddr;
444
445 nph.check_len = htons(skb->len);
446 return nip_check_sum_parse(skb_transport_header(skb),
447 skb->len, &nph)
448 == 0xffff ? true : false;
449 }
450
tcp_nip_close_state(struct sock * sk)451 static int tcp_nip_close_state(struct sock *sk)
452 {
453 int next;
454 int ns;
455
456 if (sk->sk_state >= TCP_MAX_STATES)
457 return TCP_ACTION_FIN;
458
459 next = (int)new_state[sk->sk_state];
460 ns = next & TCP_STATE_MASK;
461 tcp_set_state(sk, ns);
462
463 return next & TCP_ACTION_FIN;
464 }
465
sk_nip_stream_kill_queues(struct sock * sk)466 void sk_nip_stream_kill_queues(struct sock *sk)
467 {
468 /* First the read buffer. */
469 __skb_queue_purge(&sk->sk_receive_queue);
470
471 /* Next, the error queue. */
472 __skb_queue_purge(&sk->sk_error_queue);
473
474 /* Next, the write queue. */
475 WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
476
477 WARN_ON(sk->sk_wmem_queued);
478 }
479
tcp_nip_shutdown(struct sock * sk,int how)480 void tcp_nip_shutdown(struct sock *sk, int how)
481 {
482 if (!(how & SEND_SHUTDOWN))
483 return;
484
485 /* If we've already sent a FIN, or it's a closed state, skip this. */
486 if ((1 << sk->sk_state) &
487 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
488 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
489 /* Clear out any half completed packets. FIN if needed. */
490 if (tcp_nip_close_state(sk))
491 tcp_nip_send_fin(sk);
492 }
493 }
494
tcp_nip_close(struct sock * sk,long timeout)495 void tcp_nip_close(struct sock *sk, long timeout)
496 {
497 struct sk_buff *skb;
498 int data_was_unread = 0;
499 int state;
500 u32 sk_ack_backlog;
501
502 lock_sock(sk);
503 sk->sk_shutdown = SHUTDOWN_MASK;
504
505 nip_dbg("sk_state:%d", sk->sk_state);
506
507 if (sk->sk_state == TCP_LISTEN) {
508 tcp_set_state(sk, TCP_CLOSE);
509
510 sk_ack_backlog = READ_ONCE(sk->sk_ack_backlog);
511 inet_csk_listen_stop(sk);
512 nip_dbg("sk_state CLOSE, sk_ack_backlog=%u to %u, sk_max_ack_backlog=%u",
513 sk_ack_backlog, READ_ONCE(sk->sk_ack_backlog),
514 READ_ONCE(sk->sk_max_ack_backlog));
515 goto adjudge_to_death;
516 }
517
518 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
519 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
520
521 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
522 len--;
523 data_was_unread += len;
524 __kfree_skb(skb);
525 }
526
527 if (sk->sk_state == TCP_CLOSE)
528 goto adjudge_to_death;
529
530 if (data_was_unread) {
531 tcp_set_state(sk, TCP_CLOSE);
532 nip_sock_debug(sk, __func__);
533 tcp_nip_send_active_reset(sk, sk->sk_allocation);
534 } else if (tcp_nip_close_state(sk)) {
535 /* RED-PEN. Formally speaking, we have broken TCP state
536 * machine. State transitions:
537 *
538 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
539 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
540 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
541 */
542 nip_dbg("ready to send fin, sk_state=%d", sk->sk_state);
543 nip_sock_debug(sk, __func__);
544 tcp_nip_send_fin(sk);
545 }
546
547 adjudge_to_death:
548 state = sk->sk_state;
549 sock_hold(sk);
550 sock_orphan(sk);
551
552 /* It is the last release_sock in its life. It will remove backlog. */
553 release_sock(sk);
554
555 local_bh_disable();
556 bh_lock_sock(sk);
557 WARN_ON(sock_owned_by_user(sk));
558
559 this_cpu_dec(*sk->sk_prot->orphan_count);
560
561 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
562 goto out;
563
564 if (sk->sk_state == TCP_CLOSE)
565 inet_csk_destroy_sock(sk);
566
567 out:
568 bh_unlock_sock(sk);
569 local_bh_enable();
570 sock_put(sk);
571 }
572
573 /* These states need RST on ABORT according to RFC793 */
tcp_nip_need_reset(int state)574 static inline bool tcp_nip_need_reset(int state)
575 {
576 return (1 << state) &
577 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
578 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
579 }
580
581 /* Function
582 * Initialize some of the parameters in request_sock
583 * Parameter
584 * req: Request connection control block
585 * sk_listener: Transmission control block
586 * skb: Transfer control block buffer
587 */
tcp_nip_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)588 static void tcp_nip_init_req(struct request_sock *req,
589 const struct sock *sk_listener,
590 struct sk_buff *skb)
591 {
592 struct inet_request_sock *ireq = inet_rsk(req);
593
594 ireq->IR_NIP_RMT_ADDR = nipcb(skb)->srcaddr;
595 ireq->IR_NIP_LOC_ADDR = nipcb(skb)->dstaddr;
596 }
597
598 /* Function
599 * Initialize The initialization number SEQ. Calculate the initial serial number of
600 * the server based on part of the source address source port, part of the destination
601 * address, and destination port
602 * Parameter
603 * skb: Transfer control block buffer
604 */
tcp_nip_init_sequence(const struct sk_buff * skb)605 static __u32 tcp_nip_init_sequence(const struct sk_buff *skb)
606 {
607 return secure_tcp_nip_sequence_number(nipcb(skb)->dstaddr.NIP_ADDR_FIELD32,
608 nipcb(skb)->srcaddr.NIP_ADDR_FIELD32,
609 tcp_hdr(skb)->dest,
610 tcp_hdr(skb)->source);
611 }
612
tcp_nip_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)613 static struct dst_entry *tcp_nip_route_req(const struct sock *sk,
614 struct flowi *fl,
615 const struct request_sock *req)
616 {
617 struct dst_entry *dst;
618 struct inet_request_sock *ireq = inet_rsk(req);
619 struct flow_nip fln;
620
621 fln.daddr = ireq->IR_NIP_RMT_ADDR;
622 dst = nip_route_output(sock_net(sk), sk, &fln);
623 return dst;
624 }
625
626 /* Function
627 * Functions used by the client transport layer to connect requests
628 * This parameter is used to set the source address, destination address and interface
629 * Parameter
630 * sk: Transmission control block
631 * uaddr:The destination address
632 * addr_len:Destination address Length
633 */
tcp_nip_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)634 static int tcp_nip_connect(struct sock *sk, struct sockaddr *uaddr,
635 int addr_len)
636 {
637 struct sockaddr_nin *usin = (struct sockaddr_nin *)uaddr;
638 struct inet_sock *inet = inet_sk(sk);
639 struct tcp_sock *tp = tcp_sk(sk);
640 __be16 orig_dport;
641 struct nip_addr *daddr;
642 struct dst_entry *dst;
643 int err;
644 struct ip_options_rcu *inet_opt;
645 struct inet_timewait_death_row *tcp_death_row;
646 struct flow_nip fln;
647
648 fln.daddr = usin->sin_addr;
649
650 if (addr_len < sizeof(struct sockaddr_nin))
651 return -EINVAL;
652
653 if (usin->sin_family != AF_NINET)
654 return -EAFNOSUPPORT;
655
656 inet_opt = rcu_dereference_protected(inet->inet_opt,
657 lockdep_sock_is_held(sk));
658 /* Destination ADDRESS and port */
659 daddr = &usin->sin_addr;
660 orig_dport = usin->sin_port;
661
662 /* Find the route and obtain the source address */
663 nip_dbg("sk->sk_bound_dev_if is %d", sk->sk_bound_dev_if);
664 fln.FLOWIN_OIF = sk->sk_bound_dev_if;
665 dst = nip_dst_lookup_flow(sock_net(sk), sk, &fln, NULL);
666 if (IS_ERR(dst)) {
667 nip_dbg("cannot find dst");
668 err = PTR_ERR(dst);
669 goto failure;
670 }
671
672 /* find the actual source addr for sk->SK_NIP_RCV_SADDR */
673 if (nip_addr_eq(&sk->SK_NIP_RCV_SADDR, &nip_any_addr))
674 sk->SK_NIP_RCV_SADDR = fln.saddr;
675 fln.saddr = sk->SK_NIP_RCV_SADDR;
676
677 if (nip_addr_invalid(&fln.daddr)) {
678 nip_dbg("nip daddr invalid, bitlen=%u", fln.daddr.bitlen);
679 err = -EFAULT;
680 goto failure;
681 }
682
683 if (nip_addr_invalid(&fln.saddr)) {
684 nip_dbg("nip saddr invalid, bitlen=%u", fln.saddr.bitlen);
685 err = -EFAULT;
686 goto failure;
687 }
688
689 /* The destination address and port are set to the transport control block */
690 inet->inet_dport = usin->sin_port;
691 sk->SK_NIP_DADDR = usin->sin_addr;
692
693 inet_csk(sk)->icsk_ext_hdr_len = 0;
694 if (inet_opt)
695 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
696
697 tcp_set_state(sk, TCP_SYN_SENT);
698 sk_set_txhash(sk);
699 sk_dst_set(sk, dst);
700
701 /* Dynamically bind local ports */
702 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
703 err = ninet_hash_connect(tcp_death_row, sk);
704 if (err)
705 goto late_failure;
706
707 /* Class if the transport control block has already been linked */
708 if (tp->rx_opt.ts_recent_stamp) {
709 /* Reset inherited state */
710 tp->rx_opt.ts_recent = 0;
711 tp->rx_opt.ts_recent_stamp = 0;
712 if (likely(!tp->repair))
713 tp->write_seq = 0;
714 }
715
716 if (!tp->write_seq)
717 tp->write_seq =
718 secure_tcp_nip_sequence_number(sk->SK_NIP_RCV_SADDR.NIP_ADDR_FIELD32,
719 sk->SK_NIP_DADDR.NIP_ADDR_FIELD32,
720 inet->inet_sport,
721 usin->sin_port);
722
723 inet->inet_id = prandom_u32();
724
725 /* Call tcp_connect to send the SYN field */
726 err = __tcp_nip_connect(sk);
727 if (err)
728 goto late_failure;
729 nip_sock_debug(sk, __func__);
730 return 0;
731
732 /* failure after tcp_set_state(sk, TCP_SYN_SENT) */
733 late_failure:
734 tcp_set_state(sk, TCP_CLOSE);
735 failure:
736 nip_sock_debug_output(&usin->sin_addr, &sk->SK_NIP_RCV_SADDR,
737 usin->sin_port, inet->inet_sport, __func__);
738 sk->sk_route_caps = 0;
739 inet->inet_dport = 0;
740 return err;
741 }
742
tcp_nip_send_reset(struct sock * sk,struct sk_buff * skb)743 static void tcp_nip_send_reset(struct sock *sk, struct sk_buff *skb)
744 {
745 const struct tcphdr *th = tcp_hdr(skb);
746 u32 seq = 0;
747 u32 ack_seq = 0;
748 u32 priority = gfp_any();
749
750 /* Never send a reset in response to a reset. */
751 if (th->rst)
752 return;
753
754 nip_dbg("send rst");
755 if (th->ack)
756 seq = ntohl(th->ack_seq);
757 else
758 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
759 tcp_header_length(th);
760
761 tcp_nip_actual_send_reset(sk, skb, seq, ack_seq, 0, 1, priority);
762 }
763
764 /* Function
765 * function used by the server to send SYN+ACK segments
766 * Parameter
767 * sk: Transmission control block
768 * dst: routing。
769 * flowi: Flow control block
770 * req: Request connection control block
771 * foc: Fast open options
772 * synack_type: Type of the SYN+ACK segment
773 */
tcp_nip_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)774 static int tcp_nip_send_synack(const struct sock *sk, struct dst_entry *dst,
775 struct flowi *fl,
776 struct request_sock *req,
777 struct tcp_fastopen_cookie *foc,
778 enum tcp_synack_type synack_type,
779 struct sk_buff *syn_skb)
780 {
781 struct sk_buff *skb;
782 int err = -ENOMEM;
783
784 skb = tcp_nip_make_synack(sk, dst, req, foc, synack_type);
785 if (skb) {
786 nip_dbg("TCP server create SYN+ACK skb successfully");
787 rcu_read_lock();
788 err = nip_send_synack(req, skb);
789 rcu_read_unlock();
790 }
791
792 return err;
793 }
794
tcp_nip_reqsk_destructor(struct request_sock * req)795 static void tcp_nip_reqsk_destructor(struct request_sock *req)
796 {
797 ;
798 }
799
tcp_nip_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)800 static void tcp_nip_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
801 struct request_sock *req)
802 {
803 }
804
tcp_nip_reqsk_send_reset(const struct sock * sk,struct sk_buff * skb)805 static void tcp_nip_reqsk_send_reset(const struct sock *sk, struct sk_buff *skb)
806 {
807 }
808
tcp_nip_reqsk_syn_ack_timeout(const struct request_sock * req)809 static void tcp_nip_reqsk_syn_ack_timeout(const struct request_sock *req)
810 {
811 }
812
813 struct request_sock_ops tcp_nip_request_sock_ops __read_mostly = {
814 .family = AF_NINET,
815 .obj_size = sizeof(struct tcp_nip_request_sock),
816 .rtx_syn_ack = tcp_nip_rtx_synack,
817 .send_ack = tcp_nip_reqsk_send_ack,
818 .destructor = tcp_nip_reqsk_destructor,
819 .send_reset = tcp_nip_reqsk_send_reset,
820 .syn_ack_timeout = tcp_nip_reqsk_syn_ack_timeout,
821 };
822
823 #ifdef CONFIG_TCP_MD5SIG
nip_calc_md5_hash(char * location,const struct tcp_md5sig_key * md5,const struct sock * sk,const struct sk_buff * skb)824 static int nip_calc_md5_hash(char *location, const struct tcp_md5sig_key *md5,
825 const struct sock *sk, const struct sk_buff *skb)
826 {
827 return -EINVAL;
828 }
829
nip_req_md5_lookup(const struct sock * sk,const struct sock * addr_sk)830 static struct tcp_md5sig_key *nip_req_md5_lookup(const struct sock *sk,
831 const struct sock *addr_sk)
832 {
833 return NULL;
834 }
835 #endif
836
837 #ifdef CONFIG_SYN_COOKIES
nip_cookie_init_seq(const struct sk_buff * skb,__u16 * mss)838 static __u32 nip_cookie_init_seq(const struct sk_buff *skb, __u16 *mss)
839 {
840 return 0;
841 }
842 #endif
843
tcp_nip_init_ts_off(const struct net * net,const struct sk_buff * skb)844 static u32 tcp_nip_init_ts_off(const struct net *net, const struct sk_buff *skb)
845 {
846 return 0;
847 }
848
849 static const struct tcp_request_sock_ops tcp_request_sock_newip_ops = {
850 .mss_clamp = TCP_BASE_MSS,
851 #ifdef CONFIG_TCP_MD5SIG
852 .req_md5_lookup = nip_req_md5_lookup,
853 .calc_md5_hash = nip_calc_md5_hash,
854 #endif
855 .init_req = tcp_nip_init_req,
856 #ifdef CONFIG_SYN_COOKIES
857 .cookie_init_seq = nip_cookie_init_seq,
858 #endif
859 .route_req = tcp_nip_route_req,
860 .init_seq = tcp_nip_init_sequence,
861 .send_synack = tcp_nip_send_synack,
862 .init_ts_off = tcp_nip_init_ts_off,
863 };
864
865 /* Function
866 * The route cache saves the transport control block from the SKB
867 * Parameter
868 * sk: Transmission control block
869 * skb: Transfer control block buffer
870 * req: Request connection control block
871 * dst: routing
872 * req_unhash: Request connection control block
873 */
ninet_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)874 void ninet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
875 {
876 struct dst_entry *dst = skb_dst(skb);
877
878 if (dst && dst_hold_safe(dst)) {
879 rcu_assign_pointer(sk->sk_rx_dst, dst);
880 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
881 }
882 }
883
884 /* Function
885 * A function used by the server to process client connection requests
886 * Parameter
887 * sk: Transmission control block
888 * skb: Transfer control block buffer
889 */
tcp_nip_conn_request(struct sock * sk,struct sk_buff * skb)890 static int tcp_nip_conn_request(struct sock *sk, struct sk_buff *skb)
891 {
892 return _tcp_nip_conn_request(&tcp_nip_request_sock_ops,
893 &tcp_request_sock_newip_ops, sk, skb);
894 }
895
896 /* Function
897 * Create child control blocks
898 * Parameter
899 * sk: Transmission control block
900 * skb: Transfer control block buffer
901 * req: Request connection control block
902 * dst: routing
903 * req_unhash: Request connection control block
904 */
tcp_nip_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)905 static struct sock *tcp_nip_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
906 struct request_sock *req,
907 struct dst_entry *dst,
908 struct request_sock *req_unhash,
909 bool *own_req)
910 {
911 struct tcp_nip_request_sock *niptreq = tcp_nip_rsk(req);
912 struct inet_request_sock *ireq = inet_rsk(req);
913 bool found_dup_sk = false;
914 struct tcp_nip_sock *newtcpnipsk;
915 struct inet_sock *newinet;
916 struct tcp_sock *newtp;
917 struct sock *newsk;
918 struct flow_nip fln;
919
920 if (sk_acceptq_is_full(sk))
921 goto out_overflow;
922
923 fln.daddr = ireq->IR_NIP_RMT_ADDR;
924 if (!dst) {
925 dst = nip_route_output(sock_net(sk), sk, &fln);
926 if (!dst)
927 goto out;
928 }
929
930 newsk = tcp_nip_create_openreq_child(sk, req, skb);
931 if (!newsk)
932 goto out_nonewsk;
933
934 /* Save the received route cache */
935 ninet_sk_rx_dst_set(newsk, skb);
936
937 newtcpnipsk = (struct tcp_nip_sock *)newsk;
938 newtcpnipsk->common = niptreq->common;
939
940 newtp = tcp_sk(newsk);
941 newinet = inet_sk(newsk);
942
943 newsk->SK_NIP_DADDR = ireq->IR_NIP_RMT_ADDR;
944 newsk->SK_NIP_RCV_SADDR = ireq->IR_NIP_LOC_ADDR;
945
946 newinet->inet_opt = NULL;
947
948 inet_csk(newsk)->icsk_ext_hdr_len = 0;
949
950 newtp->retrans_stamp = jiffies;
951
952 /* Negotiate MSS */
953 newtp->mss_cache = TCP_BASE_MSS;
954 newtp->out_of_order_queue = RB_ROOT;
955 newtp->advmss = dst_metric_advmss(dst);
956 if (tcp_sk(sk)->rx_opt.user_mss &&
957 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
958 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
959
960 tcp_nip_initialize_rcv_mss(newsk);
961 if (__inet_inherit_port(sk, newsk) < 0)
962 goto put_and_exit;
963 /* Deleting the old sock from the ehash table and adding the new sock to the
964 * ehash table succeeds *own_req equals true
965 */
966 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
967 &found_dup_sk);
968
969 /* newip newsk doesn't save this dst. release it. */
970 dst_release(dst);
971 return newsk;
972
973 out_overflow:
974 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
975 out_nonewsk:
976 out:
977 /* newip newsk doesn't save this dst. release it. */
978 dst_release(dst);
979 tcp_listendrop(sk);
980 return NULL;
981 put_and_exit:
982 newinet->inet_opt = NULL;
983 inet_csk_prepare_forced_close(newsk);
984 tcp_nip_done(newsk);
985 goto out;
986 }
987
tcp_nip_send_check(struct sock * sk,struct sk_buff * skb)988 static void tcp_nip_send_check(struct sock *sk, struct sk_buff *skb)
989 {
990 }
991
tcp_nip_rebuild_header(struct sock * sk)992 static int tcp_nip_rebuild_header(struct sock *sk)
993 {
994 return -EINVAL;
995 }
996
nip_addr2sockaddr(struct sock * sk,struct sockaddr * addr)997 static void nip_addr2sockaddr(struct sock *sk, struct sockaddr *addr)
998 {
999 }
1000
nip_mtu_reduced(struct sock * sk)1001 static void nip_mtu_reduced(struct sock *sk)
1002 {
1003 }
1004
1005 static const struct inet_connection_sock_af_ops newip_specific = {
1006 .queue_xmit = tcp_nip_queue_xmit,
1007 .send_check = tcp_nip_send_check,
1008 .rebuild_header = tcp_nip_rebuild_header,
1009 .sk_rx_dst_set = ninet_sk_rx_dst_set,
1010 .conn_request = tcp_nip_conn_request,
1011 .syn_recv_sock = tcp_nip_syn_recv_sock,
1012 .net_header_len = 0,
1013 .net_frag_header_len = 0,
1014 .setsockopt = nip_setsockopt,
1015 .getsockopt = nip_getsockopt,
1016 .addr2sockaddr = nip_addr2sockaddr,
1017 .sockaddr_len = sizeof(struct sockaddr_nin),
1018 .mtu_reduced = nip_mtu_reduced,
1019 };
1020
1021 #if IS_ENABLED(CONFIG_NEWIP_FAST_KEEPALIVE)
1022 #define MAX_NIP_TCP_KEEPIDLE 32767
1023 #define MAX_NIP_TCP_KEEPINTVL 32767
1024 #define MAX_NIP_TCP_KEEPCNT 255
tcp_nip_keepalive_para_update(struct sock * sk,u32 keepalive_time,u32 keepalive_intvl,u8 keepalive_probes)1025 static int tcp_nip_keepalive_para_update(struct sock *sk,
1026 u32 keepalive_time,
1027 u32 keepalive_intvl,
1028 u8 keepalive_probes)
1029 {
1030 int val;
1031 struct tcp_sock *tp = tcp_sk(sk);
1032
1033 /* set keep idle (TCP_KEEPIDLE) */
1034 val = keepalive_time;
1035 if (val < 1 || val > MAX_NIP_TCP_KEEPIDLE) {
1036 nip_dbg("keepalive_time(%u) invalid", val);
1037 return -EINVAL;
1038 }
1039
1040 tp->keepalive_time = val;
1041 if (sock_flag(sk, SOCK_KEEPOPEN) &&
1042 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1043 u32 elapsed = keepalive_time_elapsed(tp);
1044
1045 if (tp->keepalive_time > elapsed)
1046 elapsed = tp->keepalive_time - elapsed;
1047 else
1048 elapsed = 0;
1049 inet_csk_reset_keepalive_timer(sk, elapsed);
1050 }
1051
1052 /* set keep intvl (TCP_KEEPINTVL) */
1053 val = keepalive_intvl;
1054 if (val < 1 || val > MAX_NIP_TCP_KEEPINTVL) {
1055 nip_dbg("keepalive_intvl(%u) invalid", val);
1056 return -EINVAL;
1057 }
1058 tp->keepalive_intvl = val;
1059
1060 /* set keep cnt (TCP_KEEPCNT) */
1061 val = keepalive_probes;
1062 if (val < 1 || val > MAX_NIP_TCP_KEEPCNT) {
1063 nip_dbg("keepalive_probes(%u) invalid", val);
1064 return -EINVAL;
1065 }
1066 tp->keepalive_probes = val;
1067
1068 /* enable keepalive (SO_KEEPALIVE) */
1069 if (sk->sk_prot->keepalive) {
1070 sk->sk_prot->keepalive(sk, 1);
1071 sock_valbool_flag(sk, SOCK_KEEPOPEN, 1);
1072 } else {
1073 nip_dbg("keepalive func is null");
1074 }
1075
1076 return 0;
1077 }
1078 #endif
1079
1080 #define NIP_PKT_TOTAL_LEN_BOUNDARY 100000 // 100K
1081 #define NIP_KEEPALIVE_PROBES 255
tcp_nip_keepalive_enable(struct sock * sk)1082 void tcp_nip_keepalive_enable(struct sock *sk)
1083 {
1084 #if IS_ENABLED(CONFIG_NEWIP_FAST_KEEPALIVE)
1085 int ret;
1086 struct tcp_sock *tp = tcp_sk(sk);
1087 struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
1088 struct sk_buff *skb = tcp_nip_send_head(sk);
1089
1090 if (!skb)
1091 return;
1092
1093 if (ntp->nip_keepalive_enable) {
1094 /* If keepalive set by setsockopt, backup para and change para to nip para */
1095 if (tp->keepalive_time > HZ) {
1096 ntp->keepalive_time_bak = tp->keepalive_time;
1097 ntp->keepalive_probes_bak = tp->keepalive_probes;
1098 ntp->keepalive_intvl_bak = tp->keepalive_intvl;
1099
1100 nip_dbg("HZ=%u, change time/probes/intvl [%u, %u, %u] to [%u, %u, %u]",
1101 HZ, tp->keepalive_time, tp->keepalive_probes,
1102 tp->keepalive_intvl, get_nip_keepalive_time(),
1103 NIP_KEEPALIVE_PROBES, get_nip_keepalive_intvl());
1104
1105 tp->keepalive_time = get_nip_keepalive_time();
1106 tp->keepalive_probes = NIP_KEEPALIVE_PROBES;
1107 tp->keepalive_intvl = get_nip_keepalive_intvl();
1108 inet_csk_reset_keepalive_timer(sk, tp->keepalive_time);
1109 }
1110 return;
1111 }
1112
1113 /* If keepalive set by setsockopt, backup para */
1114 if (sock_flag(sk, SOCK_KEEPOPEN)) {
1115 ntp->keepalive_time_bak = tp->keepalive_time;
1116 ntp->keepalive_probes_bak = tp->keepalive_probes;
1117 ntp->keepalive_intvl_bak = tp->keepalive_intvl;
1118 nip_dbg("HZ=%u, backup normal time/probes/intvl [%u, %u, %u]",
1119 HZ, tp->keepalive_time, tp->keepalive_probes, tp->keepalive_intvl);
1120 }
1121
1122 /* change para to nip para */
1123 ret = tcp_nip_keepalive_para_update(sk, get_nip_keepalive_time(),
1124 get_nip_keepalive_intvl(),
1125 NIP_KEEPALIVE_PROBES);
1126 if (ret != 0) {
1127 nip_dbg("fail, HZ=%u, time/probes/intvl [%u, %u, %u]",
1128 HZ, tp->keepalive_time, tp->keepalive_probes, tp->keepalive_intvl);
1129 return;
1130 }
1131
1132 nip_dbg("ok, HZ=%u, time/probes/intvl [%u, %u, %u]",
1133 HZ, tp->keepalive_time, tp->keepalive_probes, tp->keepalive_intvl);
1134 ntp->nip_keepalive_enable = true;
1135 #endif
1136 }
1137
tcp_nip_keepalive_disable(struct sock * sk)1138 void tcp_nip_keepalive_disable(struct sock *sk)
1139 {
1140 #if IS_ENABLED(CONFIG_NEWIP_FAST_KEEPALIVE)
1141 struct tcp_sock *tp = tcp_sk(sk);
1142 struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
1143
1144 if (!ntp->nip_keepalive_enable)
1145 return;
1146
1147 if (!sock_flag(sk, SOCK_KEEPOPEN)) {
1148 ntp->nip_keepalive_enable = false;
1149 nip_dbg("ok, HZ=%u, normal ka has disable", HZ);
1150 return;
1151 }
1152
1153 if (ntp->idle_ka_probes_out < get_nip_idle_ka_probes_out())
1154 return;
1155
1156 /* newip keepalive change to normal keepalive */
1157 if (ntp->keepalive_time_bak) {
1158 nip_dbg("HZ=%u, change normal time/probes/intvl [%u, %u, %u] to [%u, %u, %u]",
1159 HZ, tp->keepalive_time, tp->keepalive_probes,
1160 tp->keepalive_intvl, ntp->keepalive_time_bak, ntp->keepalive_probes_bak,
1161 ntp->keepalive_intvl_bak);
1162 tp->keepalive_time = ntp->keepalive_time_bak;
1163 tp->keepalive_probes = ntp->keepalive_probes_bak;
1164 tp->keepalive_intvl = ntp->keepalive_intvl_bak;
1165 inet_csk_reset_keepalive_timer(sk, tp->keepalive_time);
1166 return;
1167 }
1168
1169 ntp->keepalive_time_bak = 0;
1170 ntp->keepalive_probes_bak = 0;
1171 ntp->keepalive_intvl_bak = 0;
1172
1173 /* enable keepalive (SO_KEEPALIVE) */
1174 if (sk->sk_prot->keepalive)
1175 sk->sk_prot->keepalive(sk, 0);
1176 sock_valbool_flag(sk, SOCK_KEEPOPEN, 0);
1177
1178 nip_dbg("ok, HZ=%u, idle_ka_probes_out=%u", HZ, get_nip_idle_ka_probes_out());
1179 ntp->nip_keepalive_enable = false;
1180 #endif
1181 }
1182
_tcp_sock_priv_init(struct sock * sk)1183 static void _tcp_sock_priv_init(struct sock *sk)
1184 {
1185 struct tcp_sock *tp = tcp_sk(sk);
1186 struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
1187
1188 memset(ntp, 0, sizeof(*ntp));
1189 ntp->nip_ssthresh = get_nip_ssthresh_default();
1190 tp->sacked_out = 0;
1191 tp->rcv_tstamp = 0;
1192 tp->selective_acks[0].start_seq = 0;
1193 tp->selective_acks[0].end_seq = 0;
1194 tp->keepalive_time = 0;
1195 tp->keepalive_probes = 0;
1196 tp->keepalive_intvl = 0;
1197 }
1198
tcp_sock_priv_init(struct sock * sk)1199 static void tcp_sock_priv_init(struct sock *sk)
1200 {
1201 _tcp_sock_priv_init(sk);
1202 }
1203
nip_icsk_ca_init(struct sock * sk)1204 static void nip_icsk_ca_init(struct sock *sk)
1205 {
1206 }
1207
nip_icsk_ca_release(struct sock * sk)1208 static void nip_icsk_ca_release(struct sock *sk)
1209 {
1210 }
1211
nip_icsk_ca_ssthresh(struct sock * sk)1212 static u32 nip_icsk_ca_ssthresh(struct sock *sk)
1213 {
1214 return 0;
1215 }
1216
nip_icsk_ca_cong_avoid(struct sock * sk,u32 ack,u32 acked)1217 static void nip_icsk_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked)
1218 {
1219 }
1220
nip_icsk_ca_set_state(struct sock * sk,u8 new_state)1221 static void nip_icsk_ca_set_state(struct sock *sk, u8 new_state)
1222 {
1223 }
1224
nip_icsk_ca_cwnd_event(struct sock * sk,enum tcp_ca_event ev)1225 static void nip_icsk_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
1226 {
1227 }
1228
nip_icsk_ca_in_ack_event(struct sock * sk,u32 flags)1229 static void nip_icsk_ca_in_ack_event(struct sock *sk, u32 flags)
1230 {
1231 }
1232
nip_icsk_ca_undo_cwnd(struct sock * sk)1233 static u32 nip_icsk_ca_undo_cwnd(struct sock *sk)
1234 {
1235 return 0;
1236 }
1237
nip_icsk_ca_pkts_acked(struct sock * sk,const struct ack_sample * sample)1238 static void nip_icsk_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample)
1239 {
1240 }
1241
nip_icsk_ca_min_tso_segs(struct sock * sk)1242 static u32 nip_icsk_ca_min_tso_segs(struct sock *sk)
1243 {
1244 return 0;
1245 }
1246
nip_icsk_ca_sndbuf_expand(struct sock * sk)1247 static u32 nip_icsk_ca_sndbuf_expand(struct sock *sk)
1248 {
1249 return 0;
1250 }
1251
nip_icsk_ca_cong_control(struct sock * sk,const struct rate_sample * rs)1252 static void nip_icsk_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
1253 {
1254 }
1255
nip_icsk_ca_get_info(struct sock * sk,u32 ext,int * attr,union tcp_cc_info * info)1256 static size_t nip_icsk_ca_get_info(struct sock *sk, u32 ext, int *attr,
1257 union tcp_cc_info *info)
1258 {
1259 return 0;
1260 }
1261
nip_icsk_ulp_init(struct sock * sk)1262 static int nip_icsk_ulp_init(struct sock *sk)
1263 {
1264 return -EINVAL;
1265 }
1266
nip_icsk_ulp_update(struct sock * sk,struct proto * p,void (* write_space)(struct sock * sk))1267 static void nip_icsk_ulp_update(struct sock *sk, struct proto *p,
1268 void (*write_space)(struct sock *sk))
1269 {
1270 }
1271
nip_icsk_ulp_release(struct sock * sk)1272 static void nip_icsk_ulp_release(struct sock *sk)
1273 {
1274 }
1275
nip_icsk_ulp_get_info(const struct sock * sk,struct sk_buff * skb)1276 static int nip_icsk_ulp_get_info(const struct sock *sk, struct sk_buff *skb)
1277 {
1278 return -EINVAL;
1279 }
1280
nip_icsk_ulp_get_info_size(const struct sock * sk)1281 static size_t nip_icsk_ulp_get_info_size(const struct sock *sk)
1282 {
1283 return 0;
1284 }
1285
nip_icsk_ulp_clone(const struct request_sock * req,struct sock * newsk,const gfp_t priority)1286 static void nip_icsk_ulp_clone(const struct request_sock *req, struct sock *newsk,
1287 const gfp_t priority)
1288 {
1289 }
1290
1291 static struct module nip_owner;
1292
1293 struct tcp_ulp_ops nip_icsk_ulp_ops = {
1294 .init = nip_icsk_ulp_init,
1295 .update = nip_icsk_ulp_update,
1296 .release = nip_icsk_ulp_release,
1297 .get_info = nip_icsk_ulp_get_info,
1298 .get_info_size = nip_icsk_ulp_get_info_size,
1299 .clone = nip_icsk_ulp_clone,
1300 .owner = &nip_owner,
1301 };
1302
1303 struct tcp_congestion_ops nip_icsk_ca_ops = {
1304 .init = nip_icsk_ca_init,
1305 .release = nip_icsk_ca_release,
1306 .ssthresh = nip_icsk_ca_ssthresh,
1307 .cong_avoid = nip_icsk_ca_cong_avoid,
1308 .set_state = nip_icsk_ca_set_state,
1309 .cwnd_event = nip_icsk_ca_cwnd_event,
1310 .in_ack_event = nip_icsk_ca_in_ack_event,
1311 .undo_cwnd = nip_icsk_ca_undo_cwnd,
1312 .pkts_acked = nip_icsk_ca_pkts_acked,
1313 .min_tso_segs = nip_icsk_ca_min_tso_segs,
1314 .sndbuf_expand = nip_icsk_ca_sndbuf_expand,
1315 .cong_control = nip_icsk_ca_cong_control,
1316 .get_info = nip_icsk_ca_get_info,
1317 };
1318
nip_icsk_clean_acked(struct sock * sk,u32 acked_seq)1319 static void nip_icsk_clean_acked(struct sock *sk, u32 acked_seq)
1320 {
1321 }
1322
inet_connection_sock_pre_init(struct inet_connection_sock * icsk)1323 static void inet_connection_sock_pre_init(struct inet_connection_sock *icsk)
1324 {
1325 icsk->icsk_ca_ops = &nip_icsk_ca_ops;
1326 icsk->icsk_ulp_ops = &nip_icsk_ulp_ops;
1327 icsk->icsk_clean_acked = nip_icsk_clean_acked;
1328 }
1329
1330 #ifdef CONFIG_TCP_MD5SIG
nip_md5_lookup(const struct sock * sk,const struct sock * addr_sk)1331 struct tcp_md5sig_key *nip_md5_lookup(const struct sock *sk,
1332 const struct sock *addr_sk)
1333 {
1334 return NULL;
1335 }
1336
nip_md5_parse(struct sock * sk,int optname,sockptr_t optval,int optlen)1337 int nip_md5_parse(struct sock *sk, int optname, sockptr_t optval,
1338 int optlen)
1339 {
1340 return -EINVAL;
1341 }
1342
1343 const struct tcp_sock_af_ops nip_af_specific = {
1344 .md5_lookup = nip_md5_lookup,
1345 .calc_md5_hash = nip_calc_md5_hash,
1346 .md5_parse = nip_md5_parse,
1347 };
1348
1349 struct tcp_md5sig_info __rcu nip_md5sig_info;
1350 #endif
1351
tcp_sock_pre_init(struct tcp_sock * tp)1352 static void tcp_sock_pre_init(struct tcp_sock *tp)
1353 {
1354 #ifdef CONFIG_TCP_MD5SIG
1355 tp->af_specific = &nip_af_specific;
1356 tp->md5sig_info = &nip_md5sig_info;
1357 #endif
1358 }
1359
1360 /* Function
1361 * Example Initialize sock information in TCP
1362 * Parameter
1363 * sk: Sock to be initialized
1364 * Note: Currently, this function does not initialize timer, pre-queue, and congestion control,
1365 * and does not allow fast retransmission. No function is set to adjust MSS
1366 */
tcp_nip_init_sock(struct sock * sk)1367 static int tcp_nip_init_sock(struct sock *sk)
1368 {
1369 struct inet_connection_sock *icsk = inet_csk(sk);
1370 struct tcp_sock *tp = tcp_sk(sk);
1371
1372 tcp_sock_priv_init(sk);
1373
1374 tp->out_of_order_queue = RB_ROOT;
1375 tcp_nip_init_xmit_timers(sk);
1376 INIT_LIST_HEAD(&tp->tsq_node);
1377
1378 inet_connection_sock_pre_init(icsk);
1379 tcp_sock_pre_init(tp);
1380 icsk->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto());
1381 icsk->icsk_rto_min = TCP_RTO_MIN;
1382 icsk->icsk_delack_max = TCP_DELACK_MAX;
1383 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
1384 minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
1385
1386 tp->snd_cwnd = TCP_INIT_CWND;
1387 tp->app_limited = ~0U;
1388 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1389 tp->snd_cwnd_clamp = ~0;
1390 tp->mss_cache = TCP_MSS_DEFAULT;
1391
1392 tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
1393 tp->tsoffset = 0;
1394 sk->sk_state = TCP_CLOSE;
1395 sk->sk_write_space = sk_stream_write_space;
1396 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1397
1398 icsk->icsk_sync_mss = tcp_nip_sync_mss;
1399
1400 WRITE_ONCE(sk->sk_sndbuf, get_nip_sndbuf()); // sock_net(sk)->ipv4.sysctl_tcp_wmem[1]
1401 WRITE_ONCE(sk->sk_rcvbuf, get_nip_rcvbuf()); // sock_net(sk)->ipv4.sysctl_tcp_rmem[1]
1402
1403 local_bh_disable();
1404 sk_sockets_allocated_inc(sk);
1405 local_bh_enable();
1406
1407 icsk->icsk_af_ops = &newip_specific;
1408
1409 return 0;
1410 }
1411
skb_nip_entail(struct sock * sk,struct sk_buff * skb)1412 static void skb_nip_entail(struct sock *sk, struct sk_buff *skb)
1413 {
1414 struct tcp_sock *tp = tcp_sk(sk);
1415 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
1416
1417 skb->csum = 0;
1418 tcb->seq = tp->write_seq;
1419 tcb->end_seq = tp->write_seq;
1420 tcb->tcp_flags = TCPHDR_ACK;
1421 tcb->sacked = 0;
1422
1423 tcp_nip_add_write_queue_tail(sk, skb);
1424
1425 sk->sk_wmem_queued += skb->truesize;
1426 sk_mem_charge(sk, skb->truesize);
1427 }
1428
tcp_nip_xmit_size_goal(struct sock * sk,u32 mss_now,int large_allowed)1429 static unsigned int tcp_nip_xmit_size_goal(struct sock *sk, u32 mss_now,
1430 int large_allowed)
1431 {
1432 struct tcp_sock *tp = tcp_sk(sk);
1433 u32 new_size_goal = NIP_MIN_MTU;
1434 u32 size_goal;
1435
1436 if (!large_allowed || !mss_now)
1437 return mss_now;
1438
1439 /* Note : tcp_tso_autosize() will eventually split this later */
1440 if (sk->sk_gso_max_size > MAX_TCP_HEADER + 1)
1441 new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
1442 new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
1443
1444 /* We try hard to avoid divides here */
1445 size_goal = tp->gso_segs * mss_now;
1446 if (unlikely(new_size_goal < size_goal ||
1447 new_size_goal >= size_goal + mss_now)) {
1448 tp->gso_segs = min_t(u16, new_size_goal / mss_now,
1449 sk->sk_gso_max_segs);
1450 size_goal = tp->gso_segs * mss_now;
1451 }
1452
1453 return max(size_goal, mss_now);
1454 }
1455
tcp_nip_send_mss(struct sock * sk,int * size_goal,int flags)1456 int tcp_nip_send_mss(struct sock *sk, int *size_goal, int flags)
1457 {
1458 int mss_now;
1459
1460 mss_now = tcp_nip_current_mss(sk);
1461 *size_goal = tcp_nip_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
1462 return mss_now;
1463 }
1464
tcp_nip_sendmsg(struct sock * sk,struct msghdr * msg,size_t size)1465 int tcp_nip_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1466 {
1467 struct tcp_sock *tp = tcp_sk(sk);
1468 struct sk_buff *skb;
1469 int flags;
1470 int err;
1471 int copied = 0;
1472 int mss_now = 0;
1473 int size_goal;
1474 bool process_backlog = false;
1475 long timeo;
1476
1477 lock_sock(sk);
1478
1479 flags = msg->msg_flags;
1480
1481 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1482
1483 if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
1484 !tcp_passive_fastopen(sk)) {
1485 err = sk_stream_wait_connect(sk, &timeo);
1486 if (err != 0)
1487 goto do_error;
1488 }
1489
1490 /* This should be in poll */
1491 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1492
1493 copied = 0;
1494
1495 restart:
1496 mss_now = tcp_nip_send_mss(sk, &size_goal, flags);
1497
1498 nip_dbg("mss_now=%d", mss_now);
1499
1500 err = -EPIPE;
1501 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1502 goto do_error;
1503
1504 while (msg_data_left(msg)) {
1505 int copy = 0;
1506 int max = mss_now;
1507
1508 bool first_skb;
1509
1510 if (!sk_stream_memory_free(sk))
1511 goto wait_for_sndbuf;
1512
1513 if (process_backlog && sk_flush_backlog(sk)) {
1514 process_backlog = false;
1515 goto restart;
1516 }
1517 first_skb = skb_queue_empty(&sk->sk_write_queue);
1518 skb = sk_stream_alloc_skb(sk, mss_now, sk->sk_allocation, first_skb);
1519 if (!skb)
1520 goto wait_for_memory;
1521
1522 skb->tstamp = 0;
1523 process_backlog = true;
1524
1525 skb_nip_entail(sk, skb);
1526 copy = mss_now;
1527 max = mss_now;
1528
1529 /* Try to append data to the end of skb. */
1530 if (copy > msg_data_left(msg))
1531 copy = msg_data_left(msg);
1532
1533 if (skb_availroom(skb) > 0) {
1534 /* We have some space in skb head. Superb! */
1535 copy = min_t(int, copy, skb_availroom(skb));
1536 err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
1537 if (err)
1538 goto do_fault;
1539 } else {
1540 nip_dbg("msg too big, tcp cannot devide packet now");
1541 goto out;
1542 }
1543
1544 if (!copied)
1545 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1546
1547 tp->write_seq += copy;
1548 TCP_SKB_CB(skb)->end_seq += copy;
1549 tcp_skb_pcount_set(skb, 0);
1550 copied += copy;
1551 if (!msg_data_left(msg)) {
1552 if (unlikely(flags & MSG_EOR))
1553 TCP_SKB_CB(skb)->eor = 1;
1554 goto out;
1555 }
1556
1557 continue;
1558
1559 wait_for_sndbuf:
1560 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1561 wait_for_memory:
1562 if (copied)
1563 tcp_nip_push(sk, flags & ~MSG_MORE, mss_now,
1564 TCP_NAGLE_PUSH, size_goal);
1565
1566 err = sk_stream_wait_memory(sk, &timeo);
1567 if (err != 0)
1568 goto do_error;
1569
1570 mss_now = tcp_nip_send_mss(sk, &size_goal, flags);
1571 }
1572
1573 out:
1574 if (copied)
1575 tcp_nip_push(sk, flags, mss_now, tp->nonagle, size_goal);
1576 release_sock(sk);
1577 return copied;
1578
1579 do_fault:
1580 if (!skb->len) {
1581 tcp_nip_modify_send_head(sk, skb);
1582 tcp_unlink_write_queue(skb, sk);
1583 sk_wmem_free_skb(sk, skb);
1584 }
1585
1586 do_error:
1587 if (copied)
1588 goto out;
1589
1590 err = sk_stream_error(sk, flags, err);
1591 /* make sure we wake any epoll edge trigger waiter */
1592 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1593 sk->sk_write_space(sk);
1594 release_sock(sk);
1595 return err;
1596 }
1597
1598 /* Clean up the receive buffer for full frames taken by the user,
1599 * then send an ACK if necessary. COPIED is the number of bytes
1600 * tcp_recvmsg has given to the user so far, it speeds up the
1601 * calculation of whether or not we must ACK for the sake of
1602 * a window update.
1603 */
tcp_nip_cleanup_rbuf(struct sock * sk,int copied)1604 void tcp_nip_cleanup_rbuf(struct sock *sk, int copied)
1605 {
1606 struct tcp_sock *tp = tcp_sk(sk);
1607 bool time_to_ack = false;
1608
1609 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1610
1611 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1612 "cleanup rbuf bug: copied %X seq %X rcvnxt %X",
1613 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1614
1615 if (inet_csk_ack_scheduled(sk)) {
1616 const struct inet_connection_sock *icsk = inet_csk(sk);
1617
1618 if (tp->rcv_nxt - tp->rcv_wup > (get_ack_num() *
1619 TCP_ACK_NUM_MULTIPLIER * icsk->icsk_ack.rcv_mss) ||
1620 /* If this read emptied read buffer, we send ACK, if
1621 * connection is not bidirectional, user drained
1622 * receive buffer and there was a small segment
1623 * in queue.
1624 */
1625 (copied > 0 &&
1626 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1627 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1628 !inet_csk_in_pingpong_mode(sk))) &&
1629 !atomic_read(&sk->sk_rmem_alloc)))
1630 time_to_ack = true;
1631 }
1632
1633 /* We send an ACK if we can now advertise a non-zero window
1634 * which has been raised "significantly".
1635 *
1636 * Even if window raised up to infinity, do not send window open ACK
1637 * in states, where we will not receive more. It is useless.
1638 */
1639 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1640 __u32 rcv_window_now = tcp_receive_window(tp);
1641
1642 /* Optimize, __nip_tcp_select_window() is not cheap. */
1643 if (TCP_WINDOW_RAISE_THRESHOLD * rcv_window_now <= tp->window_clamp) {
1644 __u32 new_window = __nip_tcp_select_window(sk);
1645
1646 /* Send ACK now, if this read freed lots of space
1647 * in our buffer. Certainly, new_window is new window.
1648 * We can advertise it now, if it is not less than current one.
1649 * "Lots" means "at least twice" here.
1650 */
1651 if (new_window && new_window >= TCP_WINDOW_RAISE_THRESHOLD * rcv_window_now)
1652 time_to_ack = true;
1653 }
1654 }
1655 if (time_to_ack)
1656 tcp_nip_send_ack(sk);
1657 }
1658
tcp_nip_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int nonblock,int flags,int * addr_len)1659 int tcp_nip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1660 int flags, int *addr_len)
1661 {
1662 struct tcp_sock *tp = tcp_sk(sk);
1663 int copied = 0;
1664 u32 *seq;
1665 unsigned long used;
1666 int err = 0;
1667 int target;
1668 long timeo;
1669 size_t len_tmp = len;
1670 struct sk_buff *skb, *last;
1671
1672 lock_sock(sk);
1673
1674 if (sk->sk_state == TCP_LISTEN)
1675 goto out;
1676
1677 timeo = sock_rcvtimeo(sk, nonblock);
1678
1679 seq = &tp->copied_seq;
1680
1681 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len_tmp);
1682
1683 do {
1684 u32 offset;
1685 /* Next get a buffer. */
1686 last = skb_peek_tail(&sk->sk_receive_queue);
1687 skb_queue_walk(&sk->sk_receive_queue, skb) {
1688 last = skb;
1689 /* Now that we have two receive queues this
1690 * shouldn't happen.
1691 */
1692 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1693 "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X",
1694 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1695 flags))
1696 break;
1697 offset = *seq - TCP_SKB_CB(skb)->seq;
1698 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
1699 pr_err_once("found a SYN, please report");
1700 offset--;
1701 }
1702 if (offset < skb->len)
1703 goto found_ok_skb;
1704 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1705 goto found_fin_ok;
1706 /* If the first SKB in the current SK_receive_queue is not the SKB to
1707 * be replicated, then MSG_PEEK should be set in flags
1708 */
1709 WARN(!(flags & MSG_PEEK),
1710 "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X",
1711 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
1712 }
1713
1714 /* If the program is executed at this point, the SK_receive_queue is finished */
1715 /* If there is no data in the backlog, stop reading at target */
1716 if (copied >= target && !sk->sk_backlog.tail)
1717 break;
1718
1719 if (copied) {
1720 if (sk->sk_err ||
1721 sk->sk_state == TCP_CLOSE ||
1722 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1723 !timeo ||
1724 signal_pending(current))
1725 break;
1726 } else {
1727 if (sock_flag(sk, SOCK_DONE))
1728 break;
1729
1730 if (sk->sk_err) {
1731 copied = sock_error(sk);
1732 break;
1733 }
1734
1735 if (sk->sk_shutdown & RCV_SHUTDOWN)
1736 break;
1737
1738 if (sk->sk_state == TCP_CLOSE) {
1739 if (!sock_flag(sk, SOCK_DONE)) {
1740 /* This occurs when user tries to read
1741 * from never connected socket.
1742 */
1743 copied = -ENOTCONN;
1744 break;
1745 }
1746 break;
1747 }
1748
1749 if (!timeo) {
1750 copied = -EAGAIN;
1751 break;
1752 }
1753
1754 if (signal_pending(current)) {
1755 copied = sock_intr_errno(timeo);
1756 break;
1757 }
1758 }
1759
1760 tcp_nip_cleanup_rbuf(sk, copied);
1761
1762 if (copied >= target) {
1763 /* Do not sleep, just process backlog. */
1764 release_sock(sk);
1765 lock_sock(sk);
1766 } else {
1767 nip_dbg("no enough data receive queue, wait");
1768 sk_wait_data(sk, &timeo, last);
1769 }
1770 continue;
1771 found_ok_skb:
1772 used = skb->len - offset;
1773 if (len_tmp < used)
1774 used = len_tmp;
1775 nip_dbg("copy data into msg, len=%ld", used);
1776 if (!(flags & MSG_TRUNC)) {
1777 err = skb_copy_datagram_msg(skb, offset, msg, used);
1778 if (err) {
1779 nip_dbg("copy data failed");
1780 if (!copied)
1781 copied = -EFAULT;
1782 break;
1783 }
1784 }
1785 *seq += used;
1786 len_tmp -= used;
1787 copied += used;
1788
1789 if (used + offset < skb->len)
1790 continue;
1791
1792 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1793 goto found_fin_ok;
1794 if (!(flags & MSG_PEEK))
1795 sk_eat_skb(sk, skb);
1796 continue;
1797
1798 found_fin_ok:
1799 /* Process the FIN. */
1800 ++*seq;
1801 if (!(flags & MSG_PEEK))
1802 sk_eat_skb(sk, skb);
1803 break;
1804 } while (len_tmp > 0);
1805
1806 /* Clean up data we have read: This will do ACK frames. */
1807 tcp_nip_cleanup_rbuf(sk, copied);
1808
1809 release_sock(sk);
1810 return copied;
1811
1812 out:
1813 release_sock(sk);
1814 return err;
1815 }
1816
skb_nip_rbtree_purge(struct sock * sk)1817 static void skb_nip_rbtree_purge(struct sock *sk)
1818 {
1819 struct tcp_sock *tp = tcp_sk(sk);
1820
1821 skb_rbtree_purge(&tp->out_of_order_queue);
1822 }
1823
tcp_nip_destroy_sock(struct sock * sk)1824 void tcp_nip_destroy_sock(struct sock *sk)
1825 {
1826 struct tcp_sock *tp = tcp_sk(sk);
1827
1828 tcp_nip_clear_xmit_timers(sk);
1829
1830 tcp_nip_write_queue_purge(sk);
1831
1832 skb_nip_rbtree_purge(sk);
1833
1834 if (inet_csk(sk)->icsk_bind_hash)
1835 inet_put_port(sk);
1836
1837 tcp_saved_syn_free(tp);
1838 local_bh_disable();
1839 sk_sockets_allocated_dec(sk);
1840 local_bh_enable();
1841 }
1842
1843 /* Function
1844 * The sock handler for THE LISTEN and ESTABLISHED states is called by tcp_nip_rCV
1845 * Parameter
1846 * skb: Packets received from the network layer
1847 * sk: A SOCK instance needs to be processed
1848 */
tcp_nip_do_rcv(struct sock * sk,struct sk_buff * skb)1849 static int tcp_nip_do_rcv(struct sock *sk, struct sk_buff *skb)
1850 {
1851 nip_dbg("received newip tcp skb, sk_state=%d", sk->sk_state);
1852
1853 if (sk->sk_state == TCP_ESTABLISHED) {
1854 struct dst_entry *dst;
1855
1856 dst = rcu_dereference_protected(sk->sk_rx_dst,
1857 lockdep_sock_is_held(sk));
1858 if (dst) {
1859 /* Triggered when processing newly received skb after deleting routes */
1860 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1861 !dst->ops->check(dst, 0)) {
1862 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1863 dst_release(dst);
1864 }
1865 }
1866 tcp_nip_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1867 return 0;
1868 }
1869
1870 /* The connection is established in cookie mode to defend against SYN-flood attacks */
1871 if (sk->sk_state == TCP_LISTEN)
1872 nip_dbg("found TCP_LISTEN SOCK");
1873
1874 if (tcp_nip_rcv_state_process(sk, skb))
1875 goto discard;
1876 return 0;
1877
1878 discard:
1879 kfree_skb(skb);
1880 return 0;
1881 }
1882
1883 /* Function:
1884 * Fill the TCP header field in SKB into the TCP private control block,
1885 * because the TCP header field in SKB is the network byte order,
1886 * in order to facilitate later call, need to convert the host byte order
1887 * and store in the TCP control block.
1888 * Parameter:
1889 * skb:Packets delivered by the network layer
1890 * th:TCP header field in a packet
1891 */
tcp_nip_fill_cb(struct sk_buff * skb,const struct tcphdr * th)1892 static void tcp_nip_fill_cb(struct sk_buff *skb, const struct tcphdr *th)
1893 {
1894 barrier();
1895
1896 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1897 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1898 skb->len - th->doff * TCP_NUM_4);
1899
1900 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1901 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1902 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1903 TCP_SKB_CB(skb)->sacked = 0;
1904 }
1905
tcp_nip_add_backlog(struct sock * sk,struct sk_buff * skb)1906 static bool tcp_nip_add_backlog(struct sock *sk, struct sk_buff *skb)
1907 {
1908 u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
1909
1910 /* Only socket owner can try to collapse/prune rx queues
1911 * to reduce memory overhead, so add a little headroom here.
1912 * Few sockets backlog are possibly concurrently non empty.
1913 */
1914 limit += TCP_BACKLOG_HEADROOM;
1915
1916 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1917 * we can fix skb->truesize to its real value to avoid future drops.
1918 * This is valid because skb is not yet charged to the socket.
1919 * It has been noticed pure SACK packets were sometimes dropped
1920 * (if cooked by drivers without copybreak feature).
1921 */
1922 skb_condense(skb);
1923
1924 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1925 bh_unlock_sock(sk);
1926 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1927 nip_dbg("insert backlog fail");
1928 return true;
1929 }
1930 return false;
1931 }
1932
nip_skb_precheck(struct sk_buff * skb)1933 static int nip_skb_precheck(struct sk_buff *skb)
1934 {
1935 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) {
1936 nip_dbg("invalid tcp packet length, drop the packet(skb->len=%u)", skb->len);
1937 return -EINVAL;
1938 }
1939
1940 if (skb->pkt_type != PACKET_HOST) {
1941 nip_dbg("unknown pkt-type(%u), drop skb", skb->pkt_type);
1942 return -EINVAL;
1943 }
1944
1945 if (!nip_get_tcp_input_checksum(skb)) {
1946 nip_dbg("checksum fail, drop skb");
1947 return -EINVAL;
1948 }
1949
1950 return 0;
1951 }
1952
1953 /* Function
1954 * TCP is the gateway from the network layer to the transport layer
1955 * and receives data packets from the network layer
1956 * Parameter
1957 * skb:Packets delivered by the network layer
1958 */
tcp_nip_rcv(struct sk_buff * skb)1959 static int tcp_nip_rcv(struct sk_buff *skb)
1960 {
1961 const struct tcphdr *th;
1962 bool refcounted;
1963 struct sock *sk = NULL;
1964 int ret;
1965 int dif = skb->skb_iif;
1966
1967 if (nip_skb_precheck(skb))
1968 goto discard_it;
1969
1970 th = (const struct tcphdr *)skb->data;
1971
1972 if (unlikely(th->doff < sizeof(struct tcphdr) / TCP_NUM_4)) {
1973 nip_dbg("non-four byte alignment, drop skb");
1974 goto discard_it;
1975 }
1976 if (!pskb_may_pull(skb, th->doff * 4))
1977 goto discard_it;
1978
1979 sk = __ninet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1980 th->source, th->dest, dif, &refcounted);
1981 if (!sk) {
1982 nip_dbg("can`t find related sock for skb, will disconnect");
1983 goto no_tcp_socket;
1984 }
1985
1986 if (sk->sk_state == TCP_TIME_WAIT) {
1987 /* Handles the SK portion of the interrupt state */
1988 nip_dbg("sk_state is TCP_TIME_WAIT, drop skb");
1989 goto discard_it;
1990 }
1991 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1992 struct request_sock *req = inet_reqsk(sk);
1993 struct sock *nsk;
1994
1995 nip_dbg("TCP server into third shake hands, sk->sk_state:%d", sk->sk_state);
1996 sk = req->rsk_listener;
1997
1998 sock_hold(sk);
1999 refcounted = true;
2000 nsk = NULL;
2001 /* You need to create a new SOCK and enter TCP_SYN_RECV,
2002 * which is then set to Established
2003 */
2004 if (!tcp_filter(sk, skb)) {
2005 th = (const struct tcphdr *)skb->data;
2006 tcp_nip_fill_cb(skb, th);
2007 nsk = tcp_nip_check_req(sk, skb, req);
2008 }
2009 if (!nsk || nsk == sk) {
2010 nip_dbg("skb info error and create newsk failure, drop skb");
2011 reqsk_put(req);
2012 goto discard_and_relse;
2013 }
2014 if (tcp_nip_child_process(sk, nsk, skb)) {
2015 nip_dbg("child process fail, drop skb");
2016 goto discard_and_relse;
2017 } else {
2018 sock_put(sk);
2019 return 0;
2020 }
2021 }
2022
2023 tcp_nip_fill_cb(skb, th);
2024
2025 if (tcp_filter(sk, skb)) {
2026 nip_dbg("tcp filter fail, drop skb");
2027 goto discard_and_relse;
2028 }
2029 th = (const struct tcphdr *)skb->data;
2030 skb->dev = NULL;
2031
2032 if (sk->sk_state == TCP_LISTEN) {
2033 nip_dbg("TCP server into first shake hands! sk->sk_state:%d", sk->sk_state);
2034 ret = tcp_nip_do_rcv(sk, skb);
2035 goto put_and_return;
2036 }
2037 bh_lock_sock_nested(sk);
2038
2039 ret = 0;
2040 if (!sock_owned_by_user(sk)) {
2041 ret = tcp_nip_do_rcv(sk, skb);
2042 } else {
2043 nip_dbg("sock locked by user, put packet into backlog");
2044 if (tcp_nip_add_backlog(sk, skb)) {
2045 nip_dbg("add backlog fail, drop skb");
2046 goto discard_and_relse;
2047 }
2048 }
2049
2050 bh_unlock_sock(sk);
2051
2052 put_and_return:
2053 if (refcounted)
2054 sock_put(sk);
2055 return ret ? -1 : 0;
2056
2057 no_tcp_socket:
2058 tcp_nip_send_reset(NULL, skb);
2059
2060 discard_it:
2061 kfree_skb(skb);
2062 nip_sock_debug(sk, __func__);
2063 return 0;
2064
2065 discard_and_relse:
2066 sk_drops_add(sk, skb);
2067 nip_sock_debug(sk, __func__);
2068 if (refcounted)
2069 sock_put(sk);
2070 goto discard_it;
2071 }
2072
tcp_nip_early_demux(struct sk_buff * skb)2073 static void tcp_nip_early_demux(struct sk_buff *skb)
2074 {
2075 const struct tcphdr *th;
2076 struct sock *sk;
2077
2078 if (skb->pkt_type != PACKET_HOST)
2079 return;
2080
2081 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
2082 return;
2083
2084 th = tcp_hdr(skb);
2085 if (th->doff < sizeof(struct tcphdr) / BYTES_PER_TCP_HEADER)
2086 return;
2087
2088 sk = __ninet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
2089 &nipcb(skb)->srcaddr, th->source,
2090 &nipcb(skb)->dstaddr, ntohs(th->dest), skb->skb_iif);
2091 if (sk) {
2092 skb->sk = sk;
2093 skb->destructor = sock_edemux;
2094 if (sk_fullsock(sk)) {
2095 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
2096
2097 if (dst)
2098 dst = dst_check(dst, 0);
2099 if (dst && inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) {
2100 nip_dbg("find sock in ehash, set dst for skb");
2101 skb_dst_set_noref(skb, dst);
2102 }
2103 }
2104 }
2105 }
2106
tcp_nip_done(struct sock * sk)2107 void tcp_nip_done(struct sock *sk)
2108 {
2109 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
2110
2111 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
2112 TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
2113
2114 tcp_set_state(sk, TCP_CLOSE);
2115 inet_csk_clear_xmit_timers(sk);
2116 if (req)
2117 reqsk_fastopen_remove(sk, req, false);
2118
2119 sk->sk_shutdown = SHUTDOWN_MASK;
2120
2121 if (!sock_flag(sk, SOCK_DEAD)) {
2122 sk->sk_state_change(sk);
2123 } else {
2124 WARN_ON(sk->sk_state != TCP_CLOSE);
2125 WARN_ON(!sock_flag(sk, SOCK_DEAD));
2126
2127 /* It cannot be in hash table! */
2128 WARN_ON(!sk_unhashed(sk));
2129
2130 /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
2131 WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
2132 sk->sk_prot->destroy(sk);
2133
2134 sk_nip_stream_kill_queues(sk);
2135
2136 local_bh_disable();
2137 this_cpu_dec(*sk->sk_prot->orphan_count);
2138 local_bh_enable();
2139 sock_put(sk);
2140 nip_dbg("close sock done");
2141 }
2142 }
2143
2144 /* Function
2145 * Disconnect the connection to the peer end, non-blocking
2146 * Release read/write queue, send RST (not sent yet), clear timer
2147 * Parameter
2148 * sk: Transmission control block
2149 */
tcp_nip_disconnect(struct sock * sk,int flags)2150 int tcp_nip_disconnect(struct sock *sk, int flags)
2151 {
2152 struct inet_sock *inet = inet_sk(sk);
2153 struct inet_connection_sock *icsk = inet_csk(sk);
2154 struct tcp_sock *tp = tcp_sk(sk);
2155 int err = 0;
2156 int old_state = sk->sk_state;
2157 u32 sk_ack_backlog;
2158
2159 nip_dbg("old_state=%u", old_state);
2160 if (old_state != TCP_CLOSE)
2161 tcp_set_state(sk, TCP_CLOSE);
2162
2163 if (old_state == TCP_LISTEN) {
2164 sk_ack_backlog = READ_ONCE(sk->sk_ack_backlog);
2165 inet_csk_listen_stop(sk);
2166 nip_dbg("sk_state CLOSE, sk_ack_backlog=%u to %u, sk_max_ack_backlog=%u",
2167 sk_ack_backlog, READ_ONCE(sk->sk_ack_backlog),
2168 READ_ONCE(sk->sk_max_ack_backlog));
2169 } else if (tcp_nip_need_reset(old_state) || (tp->snd_nxt != tp->write_seq &&
2170 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2171 tcp_nip_send_active_reset(sk, gfp_any());
2172 sk->sk_err = ECONNRESET;
2173 } else if (old_state == TCP_SYN_SENT) {
2174 sk->sk_err = ECONNRESET;
2175 }
2176
2177 tcp_nip_clear_xmit_timers(sk);
2178 __skb_queue_purge(&sk->sk_receive_queue);
2179 tcp_write_queue_purge(sk);
2180
2181 _tcp_sock_priv_init(sk);
2182
2183 inet->inet_dport = 0;
2184 sk->sk_shutdown = 0;
2185 sock_reset_flag(sk, SOCK_DONE);
2186 tp->srtt_us = 0;
2187 tp->write_seq += tp->max_window + TCP_NUM_2;
2188 if (tp->write_seq == 0)
2189 tp->write_seq = 1;
2190 tp->snd_cwnd = TCP_NUM_2;
2191 icsk->icsk_backoff = 0;
2192 icsk->icsk_probes_out = 0;
2193 icsk->icsk_probes_tstamp = 0;
2194 icsk->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto());
2195 icsk->icsk_rto_min = TCP_RTO_MIN;
2196 icsk->icsk_delack_max = TCP_DELACK_MAX;
2197 tp->packets_out = 0;
2198 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2199 tp->snd_cwnd_cnt = 0;
2200 tp->window_clamp = 0;
2201 tp->delivered = 0;
2202 tcp_clear_retrans(tp);
2203 tp->total_retrans = 0;
2204 inet_csk_delack_init(sk);
2205
2206 icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
2207 sk->sk_send_head = NULL;
2208 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2209 __sk_dst_reset(sk);
2210 dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
2211 tp->segs_in = 0;
2212 tp->segs_out = 0;
2213 tp->bytes_acked = 0;
2214 tp->bytes_received = 0;
2215 tp->data_segs_in = 0;
2216 tp->data_segs_out = 0;
2217
2218 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2219
2220 if (sk->sk_frag.page) {
2221 put_page(sk->sk_frag.page);
2222 sk->sk_frag.page = NULL;
2223 sk->sk_frag.offset = 0;
2224 }
2225
2226 sk->sk_error_report(sk);
2227 return err;
2228 }
2229
ninet_csk_accept(struct sock * sk,int flags,int * err,bool kern)2230 struct sock *ninet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
2231 {
2232 struct sock *newsk;
2233 u32 sk_ack_backlog_last = READ_ONCE(sk->sk_ack_backlog);
2234 u32 sk_max_ack_backlog = READ_ONCE(sk->sk_max_ack_backlog);
2235
2236 newsk = inet_csk_accept(sk, flags, err, kern);
2237 nip_dbg("accept %s, sk_ack_backlog_last=%u, sk_max_ack_backlog=%u, err=%d",
2238 (newsk ? "ok" : "fail"), sk_ack_backlog_last, sk_max_ack_backlog,
2239 *err);
2240
2241 return newsk;
2242 }
2243
tcp_nip_sendpage(struct sock * sk,struct page * page,int offset,size_t size,int flags)2244 static int tcp_nip_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
2245 int flags)
2246 {
2247 return -EINVAL;
2248 }
2249
tcp_nip_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)2250 static int tcp_nip_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
2251 {
2252 return -EINVAL;
2253 }
2254
2255 struct proto tcp_nip_prot = {
2256 .name = "NIP_TCP",
2257 .owner = THIS_MODULE,
2258 .close = tcp_nip_close,
2259 .pre_connect = tcp_nip_pre_connect,
2260 .connect = tcp_nip_connect,
2261 .disconnect = tcp_nip_disconnect,
2262 .accept = ninet_csk_accept,
2263 .ioctl = tcp_ioctl,
2264 .init = tcp_nip_init_sock,
2265 .destroy = tcp_nip_destroy_sock,
2266 .shutdown = tcp_nip_shutdown,
2267 .setsockopt = tcp_setsockopt,
2268 .getsockopt = tcp_getsockopt,
2269 .keepalive = tcp_set_keepalive,
2270 .recvmsg = tcp_nip_recvmsg,
2271 .sendmsg = tcp_nip_sendmsg,
2272 .sendpage = tcp_nip_sendpage,
2273 .backlog_rcv = tcp_nip_do_rcv,
2274 .release_cb = tcp_nip_release_cb,
2275 .hash = ninet_hash,
2276 .unhash = ninet_unhash,
2277 .get_port = inet_csk_get_port,
2278 .sockets_allocated = &tcp_sockets_allocated,
2279 .orphan_count = &tcp_orphan_count,
2280 .memory_allocated = &tcp_memory_allocated,
2281 .memory_pressure = &tcp_memory_pressure,
2282 .sysctl_mem = sysctl_tcp_mem,
2283 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2284 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2285 .max_header = MAX_TCP_HEADER,
2286 .obj_size = sizeof(struct tcp_nip_sock),
2287 .rsk_prot = &tcp_nip_request_sock_ops,
2288 .h.hashinfo = &tcp_hashinfo,
2289 .no_autobind = true,
2290 };
2291
tcp_nip_err_handler(struct sk_buff * skb,struct ninet_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)2292 static void tcp_nip_err_handler(struct sk_buff *skb,
2293 struct ninet_skb_parm *opt,
2294 u8 type, u8 code, int offset, __be32 info)
2295 {
2296 }
2297
2298 static const struct ninet_protocol tcp_nip_protocol = {
2299 .early_demux = tcp_nip_early_demux,
2300 .handler = tcp_nip_rcv,
2301 .err_handler = tcp_nip_err_handler,
2302 .flags = 0,
2303 };
2304
2305 static struct inet_protosw tcp_nip_protosw = {
2306 .type = SOCK_STREAM,
2307 .protocol = IPPROTO_TCP,
2308 .prot = &tcp_nip_prot,
2309 .ops = &ninet_stream_ops,
2310 .flags = INET_PROTOSW_PERMANENT |
2311 INET_PROTOSW_ICSK,
2312 };
2313
tcp_nip_init(void)2314 int __init tcp_nip_init(void)
2315 {
2316 int ret;
2317
2318 ret = ninet_add_protocol(&tcp_nip_protocol, IPPROTO_TCP);
2319 if (ret)
2320 goto out;
2321
2322 /* register ninet protocol */
2323 ret = ninet_register_protosw(&tcp_nip_protosw);
2324 if (ret)
2325 goto out_nip_tcp_protocol;
2326
2327 out:
2328 return ret;
2329
2330 out_nip_tcp_protocol:
2331 ninet_del_protocol(&tcp_nip_protocol, IPPROTO_TCP);
2332 goto out;
2333 }
2334
2335 /* When adding the __exit tag to a function, it is important to
2336 * ensure that the function is only called during the exit phase
2337 * to avoid unnecessary warnings and errors.
2338 */
tcp_nip_exit(void)2339 void tcp_nip_exit(void)
2340 {
2341 ninet_unregister_protosw(&tcp_nip_protosw);
2342 ninet_del_protocol(&tcp_nip_protocol, IPPROTO_TCP);
2343 }
2344