1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Based on net/ipv4/tcp_timer.c
4 * Authors: Ross Biro
5 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
6 * Mark Evans, <evansmp@uhura.aston.ac.uk>
7 * Corey Minyard <wf-rch!minyard@relay.EU.net>
8 * Florian La Roche, <flla@stud.uni-sb.de>
9 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
10 * Linus Torvalds, <torvalds@cs.helsinki.fi>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Matthew Dillon, <dillon@apollo.west.oic.com>
13 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
14 * Jorge Cwik, <jorge@laser.satlink.net>
15 *
16 * NewIP INET
17 * An implementation of the TCP/IP protocol suite for the LINUX
18 * operating system. NewIP INET is implemented using the BSD Socket
19 * interface as the means of communication with the user level.
20 *
21 * Implementation of the Transmission Control Protocol(TCP).
22 */
23 #define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__
24
25 #include <net/tcp_nip.h>
26 #include <linux/module.h>
27 #include "tcp_nip_parameter.h"
28
29 #define TCP_ORPHAN_RETRIES 8
30 /**
31 * tcp_nip_orphan_retries() - Returns maximal number of retries on an orphaned socket
32 * @sk: Pointer to the current socket.
33 * @alive: bool, socket alive state
34 */
tcp_nip_orphan_retries(struct sock * sk,bool alive)35 static int tcp_nip_orphan_retries(struct sock *sk, bool alive)
36 {
37 int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
38
39 /* We know from an ICMP that something is wrong. */
40 if (sk->sk_err_soft && !alive)
41 retries = 0;
42
43 /* However, if socket sent something recently, select some safe
44 * number of retries. 8 corresponds to >100 seconds with minimal
45 * RTO of 200msec.
46 */
47 if (retries == 0 && alive)
48 retries = TCP_ORPHAN_RETRIES;
49 return retries;
50 }
51
tcp_nip_delack_timer_handler(struct sock * sk)52 void tcp_nip_delack_timer_handler(struct sock *sk)
53 {
54 struct inet_connection_sock *icsk = inet_csk(sk);
55
56 if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
57 !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
58 goto out;
59
60 if (time_after(icsk->icsk_ack.timeout, jiffies)) {
61 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
62 goto out;
63 }
64 icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
65
66 if (inet_csk_ack_scheduled(sk)) {
67 icsk->icsk_ack.ato = TCP_ATO_MIN;
68 tcp_mstamp_refresh(tcp_sk(sk));
69 tcp_nip_send_ack(sk);
70 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
71 }
72
73 out:;
74 }
75
tcp_nip_write_err(struct sock * sk)76 static void tcp_nip_write_err(struct sock *sk)
77 {
78 sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
79 sk->sk_error_report(sk);
80 /* Releasing TCP Resources */
81 tcp_nip_done(sk);
82 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
83 }
84
tcp_nip_delack_timer(struct timer_list * t)85 static void tcp_nip_delack_timer(struct timer_list *t)
86 {
87 struct inet_connection_sock *icsk =
88 from_timer(icsk, t, icsk_delack_timer);
89 struct sock *sk = &icsk->icsk_inet.sk;
90
91 bh_lock_sock(sk);
92 if (!sock_owned_by_user(sk)) {
93 tcp_nip_delack_timer_handler(sk);
94 } else {
95 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
96 /* deleguate our work to tcp_release_cb() */
97 if (!test_and_set_bit(TCP_NIP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
98 sock_hold(sk);
99 }
100 bh_unlock_sock(sk);
101 sock_put(sk);
102 }
103
retransmits_nip_timed_out(struct sock * sk,unsigned int boundary,unsigned int timeout,bool syn_set)104 static bool retransmits_nip_timed_out(struct sock *sk,
105 unsigned int boundary,
106 unsigned int timeout,
107 bool syn_set)
108 {
109 /* Newip does not support the calculation of the timeout period based on the timestamp.
110 * Currently, it determines whether the timeout period is based on
111 * the retransmission times
112 */
113 nip_dbg("icsk->retransmits=%u, boundary=%u",
114 inet_csk(sk)->icsk_retransmits, boundary);
115 return inet_csk(sk)->icsk_retransmits > boundary;
116 }
117
tcp_nip_write_timeout(struct sock * sk)118 static int tcp_nip_write_timeout(struct sock *sk)
119 {
120 struct inet_connection_sock *icsk = inet_csk(sk);
121 struct net *net = sock_net(sk);
122 int retry_until;
123 bool syn_set = false;
124
125 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
126 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
127 syn_set = true;
128 } else {
129 retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
130 if (sock_flag(sk, SOCK_DEAD)) {
131 const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
132
133 /* In the case of SOCK_DEAD, the retry_until value is smaller */
134 retry_until = tcp_nip_orphan_retries(sk, alive);
135 }
136 }
137
138 if (retransmits_nip_timed_out(sk, retry_until,
139 syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
140 nip_dbg("tcp retransmit time out");
141 tcp_nip_write_err(sk);
142 return 1;
143 }
144 return 0;
145 }
146
tcp_nip_retransmit_timer(struct sock * sk)147 void tcp_nip_retransmit_timer(struct sock *sk)
148 {
149 struct tcp_sock *tp = tcp_sk(sk);
150 struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
151 struct inet_connection_sock *icsk = inet_csk(sk);
152 struct sk_buff *skb = tcp_write_queue_head(sk);
153 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
154 struct net *net = sock_net(sk);
155 u32 icsk_rto_last;
156
157 if (!tp->packets_out)
158 return;
159
160 if (tcp_nip_write_queue_empty(sk))
161 return;
162
163 tp->tlp_high_seq = 0;
164
165 if (tcp_nip_write_timeout(sk))
166 return;
167
168 if (tcp_nip_retransmit_skb(sk, skb, 1) > 0) {
169 if (!icsk->icsk_retransmits)
170 icsk->icsk_retransmits = 1;
171 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
172 min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
173 TCP_RTO_MAX);
174
175 nip_dbg("seq %u retransmit fail, win=%u, rto=%u, pkt_out=%u, icsk_backoff=%u",
176 scb->seq, ntp->nip_ssthresh,
177 icsk->icsk_rto, tp->packets_out, icsk->icsk_backoff);
178 return;
179 }
180
181 if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
182 icsk->icsk_backoff++;
183 icsk->icsk_retransmits++;
184
185 icsk_rto_last = icsk->icsk_rto;
186 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
187 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
188 * might be increased if the stream oscillates between thin and thick,
189 * thus the old value might already be too high compared to the value
190 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
191 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
192 * exponential backoff behaviour to avoid continue hammering
193 * linear-timeout retransmissions into a black hole
194 */
195 if (sk->sk_state == TCP_ESTABLISHED &&
196 (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
197 tcp_stream_is_thin(tp) &&
198 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
199 icsk->icsk_backoff = 0;
200 icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
201 } else {
202 /* Use normal (exponential) backoff */
203 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
204 }
205
206 nip_dbg("seq %u, win[%u-%u] rto[%u-%u] pkt_out=%u, icsk_backoff=%u, retransmits=%u",
207 scb->seq, ntp->nip_ssthresh, get_ssthresh_low(),
208 icsk_rto_last, icsk->icsk_rto, tp->packets_out, icsk->icsk_backoff,
209 icsk->icsk_retransmits);
210
211 ntp->nip_ssthresh = get_ssthresh_low();
212
213 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
214 }
215
tcp_nip_probe_timer(struct sock * sk)216 void tcp_nip_probe_timer(struct sock *sk)
217 {
218 struct inet_connection_sock *icsk = inet_csk(sk);
219 struct tcp_sock *tp = tcp_sk(sk);
220 int max_probes;
221 int icsk_backoff;
222 int icsk_probes_out;
223
224 if (tp->packets_out || !tcp_nip_send_head(sk)) {
225 icsk->icsk_probes_out = 0;
226 icsk->icsk_probes_tstamp = 0;
227 icsk->icsk_backoff = 0; /* V4 no modified this line */
228 nip_dbg("packets_out(%u) not 0 or send_head is NULL, cancel probe0 timer",
229 tp->packets_out);
230 return;
231 }
232
233 /* default: sock_net(sk)->ipv4.sysctl_tcp_retries2 */
234 max_probes = get_nip_probe_max(); /* fix session auto close */
235
236 if (sock_flag(sk, SOCK_DEAD)) {
237 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
238
239 max_probes = tcp_nip_orphan_retries(sk, alive);
240 nip_dbg("sock dead, icsk_backoff=%u, max_probes=%u, alive=%u",
241 icsk->icsk_backoff, max_probes, alive);
242 if (!alive && icsk->icsk_backoff >= max_probes) {
243 nip_dbg("will close session, icsk_backoff=%u, max_probes=%u",
244 icsk->icsk_backoff, max_probes);
245 goto abort;
246 }
247 }
248
249 if (icsk->icsk_probes_out >= max_probes) {
250 abort: icsk_backoff = icsk->icsk_backoff;
251 icsk_probes_out = icsk->icsk_probes_out;
252 nip_dbg("close session, probes_out=%u, icsk_backoff=%u, max_probes=%u",
253 icsk_probes_out, icsk_backoff, max_probes);
254 tcp_nip_write_err(sk);
255 } else {
256 icsk_backoff = icsk->icsk_backoff;
257 icsk_probes_out = icsk->icsk_probes_out;
258 nip_dbg("will send probe0, probes_out=%u, icsk_backoff=%u, max_probes=%u",
259 icsk_probes_out, icsk_backoff, max_probes);
260 /* Only send another probe if we didn't close things up. */
261 tcp_nip_send_probe0(sk);
262 }
263 }
264
tcp_nip_write_timer_handler(struct sock * sk)265 void tcp_nip_write_timer_handler(struct sock *sk)
266 {
267 struct inet_connection_sock *icsk = inet_csk(sk);
268 int event;
269
270 if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending)
271 return;
272
273 if (time_after(icsk->icsk_timeout, jiffies)) {
274 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
275 return;
276 }
277 tcp_mstamp_refresh(tcp_sk(sk));
278 event = icsk->icsk_pending;
279
280 switch (event) {
281 case ICSK_TIME_RETRANS:
282 icsk->icsk_pending = 0;
283 tcp_nip_retransmit_timer(sk);
284 break;
285 case ICSK_TIME_PROBE0:
286 icsk->icsk_pending = 0;
287 tcp_nip_probe_timer(sk);
288 break;
289 default:
290 break;
291 }
292 }
293
tcp_nip_write_timer(struct timer_list * t)294 static void tcp_nip_write_timer(struct timer_list *t)
295 {
296 struct inet_connection_sock *icsk =
297 from_timer(icsk, t, icsk_retransmit_timer);
298 struct sock *sk = &icsk->icsk_inet.sk;
299
300 bh_lock_sock(sk);
301 if (!sock_owned_by_user(sk)) {
302 tcp_nip_write_timer_handler(sk);
303 } else {
304 /* delegate our work to tcp_release_cb() */
305 if (!test_and_set_bit(TCP_NIP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
306 sock_hold(sk);
307 }
308 bh_unlock_sock(sk);
309 sock_put(sk);
310 }
311
tcp_nip_keepalive_is_timeout(struct sock * sk,u32 elapsed)312 static bool tcp_nip_keepalive_is_timeout(struct sock *sk, u32 elapsed)
313 {
314 struct inet_connection_sock *icsk = inet_csk(sk);
315 struct tcp_sock *tp = tcp_sk(sk);
316 struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
317 u32 keepalive_time = keepalive_time_when(tp);
318 bool is_timeout = false;
319
320 /* keepalive set by setsockopt */
321 if (keepalive_time > HZ) {
322 /* If the TCP_USER_TIMEOUT option is enabled, use that
323 * to determine when to timeout instead.
324 */
325 if ((icsk->icsk_user_timeout != 0 &&
326 elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
327 ntp->nip_keepalive_out > 0) ||
328 (icsk->icsk_user_timeout == 0 &&
329 ntp->nip_keepalive_out >= keepalive_probes(tp))) {
330 nip_dbg("normal keepalive timeout, keepalive_out=%u",
331 ntp->nip_keepalive_out);
332 tcp_nip_write_err(sk);
333 is_timeout = true;
334 }
335 }
336
337 return is_timeout;
338 }
339
tcp_nip_keepalive_timer(struct timer_list * t)340 static void tcp_nip_keepalive_timer(struct timer_list *t)
341 {
342 struct sock *sk = from_timer(sk, t, sk_timer);
343 struct tcp_sock *tp = tcp_sk(sk);
344 struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common;
345 u32 elapsed;
346
347 /* Only process if socket is not in use. */
348 bh_lock_sock(sk);
349 if (sock_owned_by_user(sk)) {
350 /* Try again later. */
351 inet_csk_reset_keepalive_timer(sk, HZ / TCP_NIP_KEEPALIVE_CYCLE_MS_DIVISOR);
352 goto out;
353 }
354
355 if (sk->sk_state == TCP_LISTEN) {
356 nip_dbg("keepalive on a LISTEN");
357 goto out;
358 }
359 tcp_mstamp_refresh(tp);
360 /* 2022-02-18
361 * NewIP TCP doesn't have TIME_WAIT state, so socket in TCP_CLOSING
362 * uses keepalive timer to release socket.
363 */
364 if ((sk->sk_state == TCP_FIN_WAIT2 || sk->sk_state == TCP_CLOSING) &&
365 sock_flag(sk, SOCK_DEAD)) {
366 nip_dbg("finish wait, close sock, sk_state=%u", sk->sk_state);
367 goto death;
368 }
369
370 if (!sock_flag(sk, SOCK_KEEPOPEN) ||
371 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
372 goto out;
373
374 elapsed = keepalive_time_when(tp);
375
376 /* It is alive without keepalive 8) */
377 if (tp->packets_out || !tcp_write_queue_empty(sk))
378 goto resched;
379
380 elapsed = keepalive_time_elapsed(tp);
381 if (elapsed >= keepalive_time_when(tp)) {
382 if (tcp_nip_keepalive_is_timeout(sk, elapsed))
383 goto out;
384
385 if (tcp_nip_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
386 ntp->nip_keepalive_out++;
387 ntp->idle_ka_probes_out++;
388 elapsed = keepalive_intvl_when(tp);
389 } else {
390 /* If keepalive was lost due to local congestion,
391 * try harder.
392 */
393 elapsed = TCP_RESOURCE_PROBE_INTERVAL;
394 }
395 } else {
396 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
397 elapsed = keepalive_time_when(tp) - elapsed;
398 }
399
400 sk_mem_reclaim(sk);
401
402 resched:
403 inet_csk_reset_keepalive_timer(sk, elapsed);
404 goto out;
405
406 death:
407 tcp_nip_done(sk);
408
409 out:
410 tcp_nip_keepalive_disable(sk);
411 bh_unlock_sock(sk);
412 sock_put(sk);
413 }
414
tcp_nip_init_xmit_timers(struct sock * sk)415 void tcp_nip_init_xmit_timers(struct sock *sk)
416 {
417 inet_csk_init_xmit_timers(sk, &tcp_nip_write_timer, &tcp_nip_delack_timer,
418 &tcp_nip_keepalive_timer);
419 }
420
tcp_nip_clear_xmit_timers(struct sock * sk)421 void tcp_nip_clear_xmit_timers(struct sock *sk)
422 {
423 inet_csk_clear_xmit_timers(sk);
424 }
425