1 /* MIT License
2 *
3 * Copyright (c) 2024 Brad House
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * SPDX-License-Identifier: MIT
25 */
26
27 /* DNS cookies are a simple form of learned mutual authentication supported by
28 * most DNS server implementations these days and can help prevent DNS Cache
29 * Poisoning attacks for clients and DNS amplification attacks for servers.
30 *
31 * A good overview is here:
32 * https://www.dotmagazine.online/issues/digital-responsibility-and-sustainability/dns-cookies-transaction-mechanism
33 *
34 * RFCs used for implementation are
35 * [RFC7873](https://datatracker.ietf.org/doc/html/rfc7873) which is extended by
36 * [RFC9018](https://datatracker.ietf.org/doc/html/rfc9018).
37 *
38 * Though this could be used on TCP, the likelihood of it being useful is small
39 * and could cause some issues. TCP is better used as a fallback in case there
40 * are issues with DNS Cookie support in the upstream servers (e.g. AnyCast
41 * cluster issues).
42 *
43 * While most recursive DNS servers support DNS Cookies, public DNS servers like
44 * Google (8.8.8.8, 8.8.4.4) and CloudFlare (1.1.1.1, 1.0.0.1) don't seem to
45 * have this enabled yet for unknown reasons.
46 *
47 * The risk to having DNS Cookie support always enabled is nearly zero as there
48 * is built-in detection support and it will simply bypass using cookies if the
49 * remote server doesn't support it. The problem arises if a remote server
50 * supports DNS cookies, then stops supporting them (such as if an administrator
51 * reconfigured the server, or maybe there are different servers in a cluster
52 * with different configurations). We need to detect this behavior by tracking
53 * how much time has gone by since we received our last valid cookie reply, and
54 * if we exceed the threshold, reset all cookie parameters like we haven't
55 * attempted a request yet.
56 *
57 * ## Implementation Plan
58 *
59 * ### Constants:
60 * - `COOKIE_CLIENT_TIMEOUT`: 86400s (1 day)
61 * - How often to regenerate the per-server client cookie, even if our
62 * source ip address hasn't changed.
63 * - `COOKIE_UNSUPPORTED_TIMEOUT`: 300s (5 minutes)
64 * - If a server responds without a cookie in the reply, this is how long to
65 * wait before attempting to send a client cookie again.
66 * - `COOKIE_REGRESSION_TIMEOUT`: 120s (2 minutes)
67 * - If a server was once known to return cookies, and all of a sudden stops
68 * returning cookies (but the reply is otherwise valid), this is how long
69 * to continue to attempt to use cookies before giving up and resetting.
70 * Such an event would cause an outage for this duration, but since a
71 * cache poisoning attack should be dropping invalid replies we should be
72 * able to still get the valid reply and not assume it is a server
73 * regression just because we received replies without cookies.
74 * - `COOKIE_RESEND_MAX`: 3
75 * - Maximum times to resend a query to a server due to the server responding
76 * with `BAD_COOKIE`, after this, we switch to TCP.
77 *
78 * ### Per-server variables:
79 * - `cookie.state`: Known state of cookie support, enumeration.
80 * - `INITIAL` (0): Initial state, not yet determined. Used during startup.
81 * - `GENERATED` (1): Cookie has been generated and sent to a server, but no
82 * validated response yet.
83 * - `SUPPORTED` (2): Server has been determined to properly support cookies
84 * - `UNSUPPORTED` (3): Server has been determined to not support cookies
85 * - `cookie.client` : 8 byte randomly generated client cookie
86 * - `cookie.client_ts`: Timestamp client cookie was generated
87 * - `cookie.client_ip`: IP address client used to connect to server
88 * - `cookie.server`: 8 to 32 byte server cookie
89 * - `cookie.server_len`: length of server cookie
90 * - `cookie.unsupported_ts`: Timestamp of last attempt to use a cookies, but
91 * it was determined that the server didn't support them.
92 *
93 * ### Per-query variables:
94 * - `query.client_cookie`: Duplicate of `cookie.client` at the point in time
95 * the query is put on the wire. This should be available in the
96 * `ares_dns_record_t` for the request for verification purposes so we don't
97 * actually need to duplicate this, just naming it here for the ease of
98 * documentation below.
99 * - `query.cookie_try_count`: Number of tries to send a cookie but receive
100 * `BAD_COOKIE` responses. Used to know when we need to switch to TCP.
101 *
102 * ### Procedure:
103 * **NOTE**: These steps will all be done after obtaining a connection handle as
104 * some of these steps depend on determining the source ip address for the
105 * connection.
106 *
107 * 1. If the query is not using EDNS, then **skip any remaining processing**.
108 * 2. If using TCP, ensure there is no EDNS cookie opt (10) set (there may have
109 * been if this is a resend after upgrade to TCP), then **skip any remaining
110 * processing**.
111 * 3. If `cookie.state == SUPPORTED`, `cookie.unsupported_ts` is non-zero, and
112 * evaluates greater than `COOKIE_REGRESSION_TIMEOUT`, then clear all cookie
113 * settings, set `cookie.state = INITIAL`. Continue to next step (4)
114 * 4. If `cookie.state == UNSUPPORTED`
115 * - If `cookie.unsupported_ts` evaluates less than
116 * `COOKIE_UNSUPPORTED_TIMEOUT`
117 * - Ensure there is no EDNS cookie opt (10) set (shouldn't be unless
118 * requester had put this themselves), then **skip any remaining
119 * processing** as we don't want to try to send cookies.
120 * - Otherwise:
121 * - clear all cookie settings, set `cookie.state = INITIAL`.
122 * - Continue to next step (5) which will send a new cookie.
123 * 5. If `cookie.state == INITIAL`:
124 * - randomly generate new `cookie.client`
125 * - set `cookie.client_ts` to the current timestamp.
126 * - set `cookie.state = GENERATED`.
127 * - set `cookie.client_ip` to the current source ip address.
128 * 6. If `cookie.state == GENERATED || cookie.state == SUPPORTED` and
129 * `cookie.client_ip` does not match the current source ip address:
130 * - clear `cookie.server`
131 * - randomly generate new `cookie.client`
132 * - set `cookie.client_ts` to the current timestamp.
133 * - set `cookie.client_ip` to the current source ip address.
134 * - do not change the `cookie.state`
135 * 7. If `cookie.state == SUPPORTED` and `cookie.client_ts` evaluation exceeds
136 * `COOKIE_CLIENT_TIMEOUT`:
137 * - clear `cookie.server`
138 * - randomly generate new `cookie.client`
139 * - set `cookie.client_ts` to the current timestamp.
140 * - set `cookie.client_ip` to the current source ip address.
141 * - do not change the `cookie.state`
142 * 8. Generate EDNS OPT record (10) for client cookie. The option value will be
143 * the `cookie.client` concatenated with the `cookie.server`. If there is no
144 * known server cookie, it will not be appended. Copy `cookie.client` to
145 * `query.client_cookie` to handle possible client cookie changes by other
146 * queries before a reply is received (technically this is in the cached
147 * `ares_dns_record_t` so no need to manually do this). Send request to
148 * server.
149 * 9. Evaluate response:
150 * 1. If invalid EDNS OPT cookie (10) length sent back in response (valid
151 * length is 16-40), or bad client cookie value (validate first 8 bytes
152 * against `query.client_cookie` not `cookie.client`), **drop response**
153 * as if it hadn't been received. This is likely a spoofing attack.
154 * Wait for valid response up to normal response timeout.
155 * 2. If a EDNS OPT cookie (10) server cookie is returned:
156 * - set `cookie.unsupported_ts` to zero and `cookie.state = SUPPORTED`.
157 * We can confirm this server supports cookies based on the existence
158 * of this record.
159 * - If a new EDNS OPT cookie (10) server cookie is in the response, and
160 * the `client.cookie` matches the `query.client_cookie` still (hasn't
161 * been rotated by some other parallel query), save it as
162 * `cookie.server`.
163 * 3. If dns response `rcode` is `BAD_COOKIE`:
164 * - Ensure a EDNS OPT cookie (10) is returned, otherwise **drop
165 * response**, this is completely invalid and likely an spoof of some
166 * sort.
167 * - Otherwise
168 * - Increment `query.cookie_try_count`
169 * - If `query.cookie_try_count >= COOKIE_RESEND_MAX`, set
170 * `query.using_tcp` to force the next attempt to use TCP.
171 * - **Requeue the query**, but do not increment the normal
172 * `try_count` as a `BAD_COOKIE` reply isn't a normal try failure.
173 * This should end up going all the way back to step 1 on the next
174 * attempt.
175 * 4. If EDNS OPT cookie (10) is **NOT** returned in the response:
176 * - If `cookie.state == SUPPORTED`
177 * - if `cookie.unsupported_ts` is zero, set to the current timestamp.
178 * - Drop the response, wait for a valid response to be returned
179 * - if `cookie.state == GENERATED`
180 * - clear all cookie settings
181 * - set `cookie.state = UNSUPPORTED`
182 * - set `cookie.unsupported_ts` to the current time
183 * - Accept response (state should be `UNSUPPORTED` if we're here)
184 */
185
186 #include "ares_private.h"
187
188 /* 1 day */
189 #define COOKIE_CLIENT_TIMEOUT_MS (86400 * 1000)
190
191 /* 5 minutes */
192 #define COOKIE_UNSUPPORTED_TIMEOUT_MS (300 * 1000)
193
194 /* 2 minutes */
195 #define COOKIE_REGRESSION_TIMEOUT_MS (120 * 1000)
196
197 #define COOKIE_RESEND_MAX 3
198
199 static const unsigned char *
ares_dns_cookie_fetch(const ares_dns_record_t * dnsrec,size_t * len)200 ares_dns_cookie_fetch(const ares_dns_record_t *dnsrec, size_t *len)
201 {
202 const ares_dns_rr_t *rr = ares_dns_get_opt_rr_const(dnsrec);
203 const unsigned char *val = NULL;
204 *len = 0;
205
206 if (rr == NULL) {
207 return NULL;
208 }
209
210 if (!ares_dns_rr_get_opt_byid(rr, ARES_RR_OPT_OPTIONS, ARES_OPT_PARAM_COOKIE,
211 &val, len)) {
212 return NULL;
213 }
214
215 return val;
216 }
217
timeval_is_set(const ares_timeval_t * tv)218 static ares_bool_t timeval_is_set(const ares_timeval_t *tv)
219 {
220 if (tv->sec != 0 && tv->usec != 0) {
221 return ARES_TRUE;
222 }
223 return ARES_FALSE;
224 }
225
timeval_expired(const ares_timeval_t * tv,const ares_timeval_t * now,unsigned long millsecs)226 static ares_bool_t timeval_expired(const ares_timeval_t *tv,
227 const ares_timeval_t *now,
228 unsigned long millsecs)
229 {
230 ares_int64_t tvdiff_ms;
231 ares_timeval_t tvdiff;
232 ares_timeval_diff(&tvdiff, tv, now);
233
234 tvdiff_ms = tvdiff.sec * 1000 + tvdiff.usec / 1000;
235 if (tvdiff_ms >= (ares_int64_t)millsecs) {
236 return ARES_TRUE;
237 }
238 return ARES_FALSE;
239 }
240
ares_cookie_clear(ares_cookie_t * cookie)241 static void ares_cookie_clear(ares_cookie_t *cookie)
242 {
243 memset(cookie, 0, sizeof(*cookie));
244 cookie->state = ARES_COOKIE_INITIAL;
245 }
246
ares_cookie_generate(ares_cookie_t * cookie,ares_conn_t * conn,const ares_timeval_t * now)247 static void ares_cookie_generate(ares_cookie_t *cookie, ares_conn_t *conn,
248 const ares_timeval_t *now)
249 {
250 ares_channel_t *channel = conn->server->channel;
251
252 ares_rand_bytes(channel->rand_state, cookie->client, sizeof(cookie->client));
253 memcpy(&cookie->client_ts, now, sizeof(cookie->client_ts));
254 memcpy(&cookie->client_ip, &conn->self_ip, sizeof(cookie->client_ip));
255 }
256
ares_cookie_clear_server(ares_cookie_t * cookie)257 static void ares_cookie_clear_server(ares_cookie_t *cookie)
258 {
259 memset(cookie->server, 0, sizeof(cookie->server));
260 cookie->server_len = 0;
261 }
262
ares_addr_equal(const struct ares_addr * addr1,const struct ares_addr * addr2)263 static ares_bool_t ares_addr_equal(const struct ares_addr *addr1,
264 const struct ares_addr *addr2)
265 {
266 if (addr1->family != addr2->family) {
267 return ARES_FALSE;
268 }
269
270 switch (addr1->family) {
271 case AF_INET:
272 if (memcmp(&addr1->addr.addr4, &addr2->addr.addr4,
273 sizeof(addr1->addr.addr4)) == 0) {
274 return ARES_TRUE;
275 }
276 break;
277 case AF_INET6:
278 /* This structure is weird, and due to padding SonarCloud complains if
279 * you don't punch all the way down. At some point we should rework
280 * this structure */
281 if (memcmp(&addr1->addr.addr6._S6_un._S6_u8,
282 &addr2->addr.addr6._S6_un._S6_u8,
283 sizeof(addr1->addr.addr6._S6_un._S6_u8)) == 0) {
284 return ARES_TRUE;
285 }
286 break;
287 default:
288 break; /* LCOV_EXCL_LINE */
289 }
290
291 return ARES_FALSE;
292 }
293
ares_cookie_apply(ares_dns_record_t * dnsrec,ares_conn_t * conn,const ares_timeval_t * now)294 ares_status_t ares_cookie_apply(ares_dns_record_t *dnsrec, ares_conn_t *conn,
295 const ares_timeval_t *now)
296 {
297 ares_server_t *server = conn->server;
298 ares_cookie_t *cookie = &server->cookie;
299 ares_dns_rr_t *rr = ares_dns_get_opt_rr(dnsrec);
300 unsigned char c[40];
301 size_t c_len;
302
303 /* If there is no OPT record, then EDNS isn't supported, and therefore
304 * cookies can't be supported */
305 if (rr == NULL) {
306 return ARES_SUCCESS;
307 }
308
309 /* No cookies on TCP, make sure we remove one if one is present */
310 if (conn->flags & ARES_CONN_FLAG_TCP) {
311 ares_dns_rr_del_opt_byid(rr, ARES_RR_OPT_OPTIONS, ARES_OPT_PARAM_COOKIE);
312 return ARES_SUCCESS;
313 }
314
315 /* Look for regression */
316 if (cookie->state == ARES_COOKIE_SUPPORTED &&
317 timeval_is_set(&cookie->unsupported_ts) &&
318 timeval_expired(&cookie->unsupported_ts, now,
319 COOKIE_REGRESSION_TIMEOUT_MS)) {
320 ares_cookie_clear(cookie);
321 }
322
323 /* Handle unsupported state */
324 if (cookie->state == ARES_COOKIE_UNSUPPORTED) {
325 /* If timer hasn't expired, just delete any possible cookie and return */
326 if (!timeval_expired(&cookie->unsupported_ts, now,
327 COOKIE_REGRESSION_TIMEOUT_MS)) {
328 ares_dns_rr_del_opt_byid(rr, ARES_RR_OPT_OPTIONS, ARES_OPT_PARAM_COOKIE);
329 return ARES_SUCCESS;
330 }
331
332 /* We want to try to "learn" again */
333 ares_cookie_clear(cookie);
334 }
335
336 /* Generate a new cookie */
337 if (cookie->state == ARES_COOKIE_INITIAL) {
338 ares_cookie_generate(cookie, conn, now);
339 cookie->state = ARES_COOKIE_GENERATED;
340 }
341
342 /* Regenerate the cookie and clear the server cookie if the client ip has
343 * changed */
344 if ((cookie->state == ARES_COOKIE_GENERATED ||
345 cookie->state == ARES_COOKIE_SUPPORTED) &&
346 !ares_addr_equal(&conn->self_ip, &cookie->client_ip)) {
347 ares_cookie_clear_server(cookie);
348 ares_cookie_generate(cookie, conn, now);
349 }
350
351 /* If the client cookie has reached its maximum time, refresh it */
352 if (cookie->state == ARES_COOKIE_SUPPORTED &&
353 timeval_expired(&cookie->client_ts, now, COOKIE_CLIENT_TIMEOUT_MS)) {
354 ares_cookie_clear_server(cookie);
355 ares_cookie_generate(cookie, conn, now);
356 }
357
358 /* Generate the full cookie which is the client cookie concatenated with the
359 * server cookie (if there is one) and apply it. */
360 memcpy(c, cookie->client, sizeof(cookie->client));
361 if (cookie->server_len) {
362 memcpy(c + sizeof(cookie->client), cookie->server, cookie->server_len);
363 }
364 c_len = sizeof(cookie->client) + cookie->server_len;
365
366 return ares_dns_rr_set_opt(rr, ARES_RR_OPT_OPTIONS, ARES_OPT_PARAM_COOKIE, c,
367 c_len);
368 }
369
ares_cookie_validate(ares_query_t * query,const ares_dns_record_t * dnsresp,ares_conn_t * conn,const ares_timeval_t * now,ares_array_t ** requeue)370 ares_status_t ares_cookie_validate(ares_query_t *query,
371 const ares_dns_record_t *dnsresp,
372 ares_conn_t *conn, const ares_timeval_t *now,
373 ares_array_t **requeue)
374 {
375 ares_server_t *server = conn->server;
376 ares_cookie_t *cookie = &server->cookie;
377 const ares_dns_record_t *dnsreq = query->query;
378 const unsigned char *resp_cookie;
379 size_t resp_cookie_len;
380 const unsigned char *req_cookie;
381 size_t req_cookie_len;
382
383 resp_cookie = ares_dns_cookie_fetch(dnsresp, &resp_cookie_len);
384
385 /* Invalid cookie length, drop */
386 if (resp_cookie && (resp_cookie_len < 8 || resp_cookie_len > 40)) {
387 return ARES_EBADRESP;
388 }
389
390 req_cookie = ares_dns_cookie_fetch(dnsreq, &req_cookie_len);
391
392 /* Didn't request cookies, so we can stop evaluating */
393 if (req_cookie == NULL) {
394 return ARES_SUCCESS;
395 }
396
397 /* If 8-byte prefix for returned cookie doesn't match the requested cookie,
398 * drop for spoofing */
399 if (resp_cookie && memcmp(req_cookie, resp_cookie, 8) != 0) {
400 return ARES_EBADRESP;
401 }
402
403 if (resp_cookie && resp_cookie_len > 8) {
404 /* Make sure we record that we successfully received a cookie response */
405 cookie->state = ARES_COOKIE_SUPPORTED;
406 memset(&cookie->unsupported_ts, 0, sizeof(cookie->unsupported_ts));
407
408 /* If client cookie hasn't been rotated, save the returned server cookie */
409 if (memcmp(cookie->client, req_cookie, sizeof(cookie->client)) == 0) {
410 cookie->server_len = resp_cookie_len - 8;
411 memcpy(cookie->server, resp_cookie + 8, cookie->server_len);
412 }
413 }
414
415 if (ares_dns_record_get_rcode(dnsresp) == ARES_RCODE_BADCOOKIE) {
416 /* Illegal to return BADCOOKIE but no cookie, drop */
417 if (resp_cookie == NULL) {
418 return ARES_EBADRESP;
419 }
420
421 /* If we have too many attempts to send a cookie, we need to requeue as
422 * tcp */
423 query->cookie_try_count++;
424 if (query->cookie_try_count >= COOKIE_RESEND_MAX) {
425 query->using_tcp = ARES_TRUE;
426 }
427
428 /* Resend the request, hopefully it will work the next time as we should
429 * have recorded a server cookie */
430 ares_requeue_query(query, now, ARES_SUCCESS,
431 ARES_FALSE /* Don't increment try count */, NULL,
432 requeue);
433
434 /* Parent needs to drop this response */
435 return ARES_EBADRESP;
436 }
437
438 /* We've got a response with a server cookie, and we've done all the
439 * evaluation we can, return success */
440 if (resp_cookie_len > 8) {
441 return ARES_SUCCESS;
442 }
443
444 if (cookie->state == ARES_COOKIE_SUPPORTED) {
445 /* If we're not currently tracking an error time yet, start */
446 if (!timeval_is_set(&cookie->unsupported_ts)) {
447 memcpy(&cookie->unsupported_ts, now, sizeof(cookie->unsupported_ts));
448 }
449 /* Drop it since we expected a cookie */
450 return ARES_EBADRESP;
451 }
452
453 if (cookie->state == ARES_COOKIE_GENERATED) {
454 ares_cookie_clear(cookie);
455 cookie->state = ARES_COOKIE_UNSUPPORTED;
456 memcpy(&cookie->unsupported_ts, now, sizeof(cookie->unsupported_ts));
457 }
458
459 /* Cookie state should be UNSUPPORTED if we're here */
460 return ARES_SUCCESS;
461 }
462