1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
30 */
31
32 #include <string.h>
33
34 #include "tcp.h"
35 #include "tcp_fsm.h"
36 #include "tcp_seq.h"
37 #include "tcp_timer.h"
38 #include "tcp_var.h"
39
40 #include "tcp_const.h"
41 #include <openthread/ip6.h>
42 #include <openthread/message.h>
43
44 /*
45 * samkumar: The V_nolocaltimewait variable corresponds to the
46 * net.inet.tcp.nolocaltimewait option in FreeBSD. When set to 1, it skips the
47 * TIME-WAIT state for TCP connections where both endpoints are local IP
48 * addresses, to save resources on HTTP accelerators, database servers/clients,
49 * etc. In TCPlp, I eliminated support for this feature, but I have kept the
50 * code for it, commented out with "#if 0", in case we choose to bring it back
51 * at a later time.
52 *
53 * See also the "#if 0" block in tcp_twstart.
54 */
55 #if 0
56 enum tcp_timewait_consts {
57 V_nolocaltimewait = 0
58 };
59 #endif
60
61 /*
62 * samkumar: The FreeBSD code used a separate, smaller structure, called
63 * struct tcptw, to respresent connections in the TIME-WAIT state. In TCPlp,
64 * we use the full struct tcpcb structure even in the TIME-WAIT state. This
65 * consumes more memory, but switching to a different structure like
66 * struct tcptw to save memory would be difficult because the host system or
67 * application has allocated these structures; we can't simply "free" the
68 * struct tcpcb. It would have to have been done via a callback or something,
69 * and in the common case of statically allocated sockets, this would actually
70 * result in more memory (since an application would need to allocate both the
71 * struct tcpcb and the struct tcptw, if it uses a static allocation approach).
72 *
73 * Below, I've changed the function signatures to accept "struct tcpcb* tp"
74 * instead of "struct tcptw *tw" and I have reimplemented the functions
75 * to work using tp (of type struct tcpcb) instead of tw (of type
76 * struct tcptw).
77 *
78 * Conceptually, the biggest change is in how timers are handled. The FreeBSD
79 * code had a 2MSL timer, which was set for sockets that enter certain
80 * "closing" states of the TCP state machine. But when the TIME-WAIT state was
81 * entered, the state is transferred from struct tcpcb into struct tcptw.
82 * The final timeout is handled as follows; the function tcp_tw_2msl_scan is
83 * called periodically on the slow timer, and it iterates over a linked list
84 * of all the struct tcptw and checks the tw->tw_time field to identify which
85 * TIME-WAIT sockets have expired.
86 *
87 * In our switch to using struct tcpcb even in the TIME-WAIT state, we rely on
88 * the timer system for struct tcpcb. I modified the 2msl callback in
89 * tcp_timer.c to check for the TIME-WAIT case and handle it correctly.
90 */
91
92 static void
tcp_tw_2msl_reset(struct tcpcb * tp,int rearm)93 tcp_tw_2msl_reset(struct tcpcb* tp, int rearm)
94 {
95 /*
96 * samkumar: This function used to set tw->tw_time to ticks + 2 * tcp_msl
97 * and insert tw into the linked list V_twq_2msl. I've replaced this, along
98 * with the associated locking logic, with the following call, which uses
99 * the timer system in place for full TCBs.
100 */
101 tcp_timer_activate(tp, TT_2MSL, 2 * tcp_msl);
102 }
103
104 /*
105 * samkumar: I've rewritten this code since I need to send out packets via the
106 * host system for TCPlp: allocating buffers from the host system, populate
107 * them, and then pass them back to the host system. I simplified the code by
108 * only using the logic that was fully necessary, eliminating the code for IPv4
109 * packets and keeping only the code for IPv6 packets. I also removed all of
110 * the mbuf logic, instead using the logic for using the host system's
111 * buffering.
112 *
113 * This rewritten code always returns 0. The original code would return
114 * whatever is returned by ip_output or ip6_output (FreeBSD's functions for
115 * sending out IP packets). I believe 0 indicates success, and a nonzero
116 * value represents an error code. It seems that the return value of
117 * tcp_twrespond is ignored by all instances of its use in TCPlp (maybe even
118 * in all of FreeBSD), so this is a moot point.
119 */
120 static int
tcp_twrespond(struct tcpcb * tp,int flags)121 tcp_twrespond(struct tcpcb* tp, int flags)
122 {
123 struct tcphdr* nth;
124 struct tcpopt to;
125 uint32_t optlen = 0;
126 uint8_t opt[TCP_MAXOLEN];
127
128 to.to_flags = 0;
129
130 /*
131 * Send a timestamp and echo-reply if both our side and our peer
132 * have sent timestamps in our SYN's and this is not a RST.
133 */
134 if ((tp->t_flags & TF_RCVD_TSTMP) && flags == TH_ACK) {
135 to.to_flags |= TOF_TS;
136 to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
137 to.to_tsecr = tp->ts_recent;
138 }
139 optlen = tcp_addoptions(&to, opt);
140
141 otMessage* message = tcplp_sys_new_message(tp->instance);
142 if (message == NULL) {
143 return 0; // drop the message
144 }
145 if (otMessageSetLength(message, sizeof(struct tcphdr) + optlen) != OT_ERROR_NONE) {
146 tcplp_sys_free_message(tp->instance, message);
147 return 0; // drop the message
148 }
149
150 char outbuf[sizeof(struct tcphdr) + optlen];
151 nth = (struct tcphdr*) &outbuf[0];
152 otMessageInfo ip6info;
153 memset(&ip6info, 0x00, sizeof(ip6info));
154
155 memcpy(&ip6info.mSockAddr, &tp->laddr, sizeof(ip6info.mSockAddr));
156 memcpy(&ip6info.mPeerAddr, &tp->faddr, sizeof(ip6info.mPeerAddr));
157 nth->th_sport = tp->lport;
158 nth->th_dport = tp->fport;
159 nth->th_seq = htonl(tp->snd_nxt);
160 nth->th_ack = htonl(tp->rcv_nxt);
161 nth->th_off_x2 = ((sizeof(struct tcphdr) + optlen) >> 2) << TH_OFF_SHIFT;
162 nth->th_flags = flags;
163 nth->th_win = htons(tp->tw_last_win);
164 nth->th_urp = 0;
165 nth->th_sum = 0;
166
167 memcpy(nth + 1, opt, optlen);
168 otMessageWrite(message, 0, outbuf, sizeof(struct tcphdr) + optlen);
169 tcplp_sys_send_message(tp->instance, message, &ip6info);
170
171 return 0;
172 }
173
174 /*
175 * Move a TCP connection into TIME_WAIT state.
176 * tcbinfo is locked.
177 * inp is locked, and is unlocked before returning.
178 */
179 /*
180 * samkumar: Locking is removed (so above comments regarding locks are no
181 * not relevant for TCPlp). Rather than allocating a struct tcptw and
182 * discarding the struct tcpcb, this function just switches the tcpcb state
183 * to correspond to TIME-WAIT (updating variables as appropriate). We also
184 * eliminate the "V_nolocaltimewait" optimization.
185 */
186 void
tcp_twstart(struct tcpcb * tp)187 tcp_twstart(struct tcpcb *tp)
188 {
189 int acknow;
190
191 /*
192 * samkumar: The following code, commented out using "#if 0", handles the
193 * net.inet.tcp.nolocaltimewait option in FreeBSD. The option skips the
194 * TIME-WAIT state for TCP connections where both endpoints are local.
195 * I'm removing this optimization for TCPlp, but I've left the code
196 * commented out as it's a potentially useful feature that we may choose
197 * to restore later.
198 *
199 * See also the "#if 0" block near the top of this file.
200 */
201 #if 0
202 if (V_nolocaltimewait) {
203 int error = 0;
204 #ifdef INET6
205 if (isipv6)
206 error = in6_localaddr(&inp->in6p_faddr);
207 #endif
208 #if defined(INET6) && defined(INET)
209 else
210 #endif
211 #ifdef INET
212 error = in_localip(inp->inp_faddr);
213 #endif
214 if (error) {
215 tp = tcp_close(tp);
216 if (tp != NULL)
217 INP_WUNLOCK(inp);
218 return;
219 }
220 }
221 #endif
222
223 /*
224 * For use only by DTrace. We do not reference the state
225 * after this point so modifying it in place is not a problem.
226 */
227 /*
228 * samkumar: The above comment is not true anymore. I use this state, since
229 * I don't associate every struct tcpcb with a struct inpcb.
230 */
231 tcp_state_change(tp, TCPS_TIME_WAIT);
232
233 /*
234 * samkumar: There used to be code here to allocate a struct tcptw
235 * using "tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);" and if it fails, close
236 * an existing TIME-WAIT connection, in LRU fashion, to allocate memory.
237 */
238
239 /*
240 * Recover last window size sent.
241 */
242 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
243 tp->tw_last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
244 else
245 tp->tw_last_win = 0;
246
247 /*
248 * Set t_recent if timestamps are used on the connection.
249 */
250 if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
251 (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
252 /*
253 * samkumar: This used to do:
254 * tw->t_recent = tp->ts_recent;
255 * tw->ts_offset = tp->ts_offset;
256 * But since we're keeping the state in tp, we don't need to do this
257 * anymore. */
258 } else {
259 tp->ts_recent = 0;
260 tp->ts_offset = 0;
261 }
262
263 /*
264 * samkumar: There used to be code here to populate various fields in
265 * tw based on their values in tp, but there's no need for that now since
266 * we can just read the values from tp. tw->tw_time was set to 0, but we
267 * don't need to do that either since we're relying on the old timer system
268 * anyway.
269 */
270
271 /* XXX
272 * If this code will
273 * be used for fin-wait-2 state also, then we may need
274 * a ts_recent from the last segment.
275 */
276 acknow = tp->t_flags & TF_ACKNOW;
277
278 /*
279 * First, discard tcpcb state, which includes stopping its timers and
280 * freeing it. tcp_discardcb() used to also release the inpcb, but
281 * that work is now done in the caller.
282 *
283 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
284 * and might not be needed here any longer.
285 */
286 /*
287 * samkumar: Below, I removed the code to discard tp, update inpcb and
288 * release a reference to socket, but kept the rest. I also added a call
289 * to cancel any pending timers on the TCB (which discarding it, as the
290 * original code did, would have done).
291 */
292 tcp_cancel_timers(tp);
293 if (acknow)
294 tcp_twrespond(tp, TH_ACK);
295 tcp_tw_2msl_reset(tp, 0);
296 }
297
298 /*
299 * Returns 1 if the TIME_WAIT state was killed and we should start over,
300 * looking for a pcb in the listen state. Returns 0 otherwise.
301 */
302 /*
303 * samkumar: Old signature was
304 * int
305 * tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
306 * struct mbuf *m, int tlen)
307 */
308 int
tcp_twcheck(struct tcpcb * tp,struct tcphdr * th,int tlen)309 tcp_twcheck(struct tcpcb* tp, struct tcphdr *th, int tlen)
310 {
311 int thflags;
312 tcp_seq seq;
313
314 /*
315 * samkumar: There used to be code here that obtains the struct tcptw from
316 * the inpcb, and does "goto drop" if that fails.
317 */
318
319 thflags = th->th_flags;
320
321 /*
322 * NOTE: for FIN_WAIT_2 (to be added later),
323 * must validate sequence number before accepting RST
324 */
325
326 /*
327 * If the segment contains RST:
328 * Drop the segment - see Stevens, vol. 2, p. 964 and
329 * RFC 1337.
330 */
331 if (thflags & TH_RST)
332 goto drop;
333
334 /*
335 * samkumar: This was commented out (using #if 0) in the original FreeBSD
336 * code.
337 */
338 #if 0
339 /* PAWS not needed at the moment */
340 /*
341 * RFC 1323 PAWS: If we have a timestamp reply on this segment
342 * and it's less than ts_recent, drop it.
343 */
344 if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
345 TSTMP_LT(to.to_tsval, tp->ts_recent)) {
346 if ((thflags & TH_ACK) == 0)
347 goto drop;
348 goto ack;
349 }
350 /*
351 * ts_recent is never updated because we never accept new segments.
352 */
353 #endif
354
355 /*
356 * If a new connection request is received
357 * while in TIME_WAIT, drop the old connection
358 * and start over if the sequence numbers
359 * are above the previous ones.
360 */
361 if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tp->rcv_nxt)) {
362 /*
363 * samkumar: The FreeBSD code would call tcp_twclose(tw, 0); but we
364 * do it as below since TCPlp represents TIME-WAIT connects as
365 * struct tcpcb's.
366 */
367 tcp_close(tp);
368 tcplp_sys_connection_lost(tp, CONN_LOST_NORMAL);
369 return (1);
370 }
371
372 /*
373 * Drop the segment if it does not contain an ACK.
374 */
375 if ((thflags & TH_ACK) == 0)
376 goto drop;
377
378 /*
379 * Reset the 2MSL timer if this is a duplicate FIN.
380 */
381 if (thflags & TH_FIN) {
382 seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
383 if (seq + 1 == tp->rcv_nxt)
384 tcp_tw_2msl_reset(tp, 1);
385 }
386
387 /*
388 * Acknowledge the segment if it has data or is not a duplicate ACK.
389 */
390 if (thflags != TH_ACK || tlen != 0 ||
391 th->th_seq != tp->rcv_nxt || th->th_ack != tp->snd_nxt)
392 tcp_twrespond(tp, TH_ACK);
393 drop:
394 return (0);
395 }
396