1 /* MIT License
2 *
3 * Copyright (c) 1998 Massachusetts Institute of Technology
4 * Copyright (c) 2010 Daniel Stenberg
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * SPDX-License-Identifier: MIT
26 */
27
28 #include "ares_setup.h"
29
30
31 #ifdef HAVE_STRINGS_H
32 # include <strings.h>
33 #endif
34 #ifdef HAVE_SYS_IOCTL_H
35 # include <sys/ioctl.h>
36 #endif
37 #ifdef NETWARE
38 # include <sys/filio.h>
39 #endif
40 #ifdef HAVE_STDINT_H
41 # include <stdint.h>
42 #endif
43
44 #include <assert.h>
45 #include <fcntl.h>
46 #include <limits.h>
47
48 #include "ares.h"
49 #include "ares_private.h"
50 #include "ares_nameser.h"
51 #include "ares_dns.h"
52
53 static ares_bool_t try_again(int errnum);
54 static void write_tcp_data(ares_channel_t *channel, fd_set *write_fds,
55 ares_socket_t write_fd);
56 static void read_packets(ares_channel_t *channel, fd_set *read_fds,
57 ares_socket_t read_fd, struct timeval *now);
58 static void process_timeouts(ares_channel_t *channel, struct timeval *now);
59 static ares_status_t process_answer(ares_channel_t *channel,
60 const unsigned char *abuf, size_t alen,
61 struct server_connection *conn,
62 ares_bool_t tcp, struct timeval *now);
63 static void handle_conn_error(struct server_connection *conn,
64 ares_bool_t critical_failure);
65
66 static ares_bool_t same_questions(const ares_dns_record_t *qrec,
67 const ares_dns_record_t *arec);
68 static ares_bool_t same_address(const struct sockaddr *sa,
69 const struct ares_addr *aa);
70 static void end_query(ares_channel_t *channel, struct query *query,
71 ares_status_t status, const unsigned char *abuf,
72 size_t alen);
73
server_increment_failures(struct server_state * server)74 static void server_increment_failures(struct server_state *server)
75 {
76 ares__slist_node_t *node;
77 const ares_channel_t *channel = server->channel;
78
79 node = ares__slist_node_find(channel->servers, server);
80 if (node == NULL) {
81 return;
82 }
83 server->consec_failures++;
84 ares__slist_node_reinsert(node);
85 }
86
server_set_good(struct server_state * server)87 static void server_set_good(struct server_state *server)
88 {
89 ares__slist_node_t *node;
90 const ares_channel_t *channel = server->channel;
91
92 if (!server->consec_failures) {
93 return;
94 }
95
96 node = ares__slist_node_find(channel->servers, server);
97 if (node == NULL) {
98 return;
99 }
100
101 server->consec_failures = 0;
102 ares__slist_node_reinsert(node);
103 }
104
105 /* return true if now is exactly check time or later */
ares__timedout(const struct timeval * now,const struct timeval * check)106 ares_bool_t ares__timedout(const struct timeval *now,
107 const struct timeval *check)
108 {
109 ares_int64_t secs = ((ares_int64_t)now->tv_sec - (ares_int64_t)check->tv_sec);
110
111 if (secs > 0) {
112 return ARES_TRUE; /* yes, timed out */
113 }
114 if (secs < 0) {
115 return ARES_FALSE; /* nope, not timed out */
116 }
117
118 /* if the full seconds were identical, check the sub second parts */
119 return ((ares_int64_t)now->tv_usec - (ares_int64_t)check->tv_usec) >= 0
120 ? ARES_TRUE
121 : ARES_FALSE;
122 }
123
124 /* add the specific number of milliseconds to the time in the first argument */
timeadd(struct timeval * now,size_t millisecs)125 static void timeadd(struct timeval *now, size_t millisecs)
126 {
127 now->tv_sec += (time_t)millisecs / 1000;
128 now->tv_usec += (time_t)((millisecs % 1000) * 1000);
129
130 if (now->tv_usec >= 1000000) {
131 ++(now->tv_sec);
132 now->tv_usec -= 1000000;
133 }
134 }
135
136 /*
137 * generic process function
138 */
processfds(ares_channel_t * channel,fd_set * read_fds,ares_socket_t read_fd,fd_set * write_fds,ares_socket_t write_fd)139 static void processfds(ares_channel_t *channel, fd_set *read_fds,
140 ares_socket_t read_fd, fd_set *write_fds,
141 ares_socket_t write_fd)
142 {
143 struct timeval now;
144
145 if (channel == NULL) {
146 return;
147 }
148
149 ares__channel_lock(channel);
150
151 now = ares__tvnow();
152 read_packets(channel, read_fds, read_fd, &now);
153 process_timeouts(channel, &now);
154 /* Write last as the other 2 operations might have triggered writes */
155 write_tcp_data(channel, write_fds, write_fd);
156
157 ares__channel_unlock(channel);
158 }
159
160 /* Something interesting happened on the wire, or there was a timeout.
161 * See what's up and respond accordingly.
162 */
ares_process(ares_channel_t * channel,fd_set * read_fds,fd_set * write_fds)163 void ares_process(ares_channel_t *channel, fd_set *read_fds, fd_set *write_fds)
164 {
165 processfds(channel, read_fds, ARES_SOCKET_BAD, write_fds, ARES_SOCKET_BAD);
166 }
167
168 /* Something interesting happened on the wire, or there was a timeout.
169 * See what's up and respond accordingly.
170 */
ares_process_fd(ares_channel_t * channel,ares_socket_t read_fd,ares_socket_t write_fd)171 void ares_process_fd(ares_channel_t *channel,
172 ares_socket_t read_fd, /* use ARES_SOCKET_BAD or valid
173 file descriptors */
174 ares_socket_t write_fd)
175 {
176 processfds(channel, NULL, read_fd, NULL, write_fd);
177 }
178
179 /* Return 1 if the specified error number describes a readiness error, or 0
180 * otherwise. This is mostly for HP-UX, which could return EAGAIN or
181 * EWOULDBLOCK. See this man page
182 *
183 * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html?
184 * manpage=/usr/share/man/man2.Z/send.2
185 */
try_again(int errnum)186 static ares_bool_t try_again(int errnum)
187 {
188 #if !defined EWOULDBLOCK && !defined EAGAIN
189 # error "Neither EWOULDBLOCK nor EAGAIN defined"
190 #endif
191
192 #ifdef EWOULDBLOCK
193 if (errnum == EWOULDBLOCK) {
194 return ARES_TRUE;
195 }
196 #endif
197
198 #if defined EAGAIN && EAGAIN != EWOULDBLOCK
199 if (errnum == EAGAIN) {
200 return ARES_TRUE;
201 }
202 #endif
203
204 return ARES_FALSE;
205 }
206
207 /* If any TCP sockets select true for writing, write out queued data
208 * we have for them.
209 */
write_tcp_data(ares_channel_t * channel,fd_set * write_fds,ares_socket_t write_fd)210 static void write_tcp_data(ares_channel_t *channel, fd_set *write_fds,
211 ares_socket_t write_fd)
212 {
213 ares__slist_node_t *node;
214
215 if (!write_fds && (write_fd == ARES_SOCKET_BAD)) {
216 /* no possible action */
217 return;
218 }
219
220 for (node = ares__slist_node_first(channel->servers); node != NULL;
221 node = ares__slist_node_next(node)) {
222 struct server_state *server = ares__slist_node_val(node);
223 const unsigned char *data;
224 size_t data_len;
225 ares_ssize_t count;
226
227 /* Make sure server has data to send and is selected in write_fds or
228 write_fd. */
229 if (ares__buf_len(server->tcp_send) == 0 || server->tcp_conn == NULL) {
230 continue;
231 }
232
233 if (write_fds) {
234 if (!FD_ISSET(server->tcp_conn->fd, write_fds)) {
235 continue;
236 }
237 } else {
238 if (server->tcp_conn->fd != write_fd) {
239 continue;
240 }
241 }
242
243 if (write_fds) {
244 /* If there's an error and we close this socket, then open
245 * another with the same fd to talk to another server, then we
246 * don't want to think that it was the new socket that was
247 * ready. This is not disastrous, but is likely to result in
248 * extra system calls and confusion. */
249 FD_CLR(server->tcp_conn->fd, write_fds);
250 }
251
252 data = ares__buf_peek(server->tcp_send, &data_len);
253 count = ares__socket_write(channel, server->tcp_conn->fd, data, data_len);
254 if (count <= 0) {
255 if (!try_again(SOCKERRNO)) {
256 handle_conn_error(server->tcp_conn, ARES_TRUE);
257 }
258 continue;
259 }
260
261 /* Strip data written from the buffer */
262 ares__buf_consume(server->tcp_send, (size_t)count);
263
264 /* Notify state callback all data is written */
265 if (ares__buf_len(server->tcp_send) == 0) {
266 SOCK_STATE_CALLBACK(channel, server->tcp_conn->fd, 1, 0);
267 }
268 }
269 }
270
271 /* If any TCP socket selects true for reading, read some data,
272 * allocate a buffer if we finish reading the length word, and process
273 * a packet if we finish reading one.
274 */
read_tcp_data(ares_channel_t * channel,struct server_connection * conn,struct timeval * now)275 static void read_tcp_data(ares_channel_t *channel,
276 struct server_connection *conn, struct timeval *now)
277 {
278 ares_ssize_t count;
279 struct server_state *server = conn->server;
280
281 /* Fetch buffer to store data we are reading */
282 size_t ptr_len = 65535;
283 unsigned char *ptr;
284
285 ptr = ares__buf_append_start(server->tcp_parser, &ptr_len);
286
287 if (ptr == NULL) {
288 handle_conn_error(conn, ARES_FALSE /* not critical to connection */);
289 return; /* bail out on malloc failure. TODO: make this
290 function return error codes */
291 }
292
293 /* Read from socket */
294 count = ares__socket_recv(channel, conn->fd, ptr, ptr_len);
295 if (count <= 0) {
296 ares__buf_append_finish(server->tcp_parser, 0);
297 if (!(count == -1 && try_again(SOCKERRNO))) {
298 handle_conn_error(conn, ARES_TRUE);
299 }
300 return;
301 }
302
303 /* Record amount of data read */
304 ares__buf_append_finish(server->tcp_parser, (size_t)count);
305
306 /* Process all queued answers */
307 while (1) {
308 unsigned short dns_len = 0;
309 const unsigned char *data = NULL;
310 size_t data_len = 0;
311 ares_status_t status;
312
313 /* Tag so we can roll back */
314 ares__buf_tag(server->tcp_parser);
315
316 /* Read length indicator */
317 if (ares__buf_fetch_be16(server->tcp_parser, &dns_len) != ARES_SUCCESS) {
318 ares__buf_tag_rollback(server->tcp_parser);
319 break;
320 }
321
322 /* Not enough data for a full response yet */
323 if (ares__buf_consume(server->tcp_parser, dns_len) != ARES_SUCCESS) {
324 ares__buf_tag_rollback(server->tcp_parser);
325 break;
326 }
327
328 /* Can't fail except for misuse */
329 data = ares__buf_tag_fetch(server->tcp_parser, &data_len);
330 if (data == NULL) {
331 ares__buf_tag_clear(server->tcp_parser);
332 break;
333 }
334
335 /* Strip off 2 bytes length */
336 data += 2;
337 data_len -= 2;
338
339 /* We finished reading this answer; process it */
340 status = process_answer(channel, data, data_len, conn, ARES_TRUE, now);
341 if (status != ARES_SUCCESS) {
342 handle_conn_error(conn, ARES_TRUE);
343 return;
344 }
345
346 /* Since we processed the answer, clear the tag so space can be reclaimed */
347 ares__buf_tag_clear(server->tcp_parser);
348 }
349
350 ares__check_cleanup_conn(channel, conn);
351 }
352
socket_list_append(ares_socket_t ** socketlist,ares_socket_t fd,size_t * alloc_cnt,size_t * num)353 static int socket_list_append(ares_socket_t **socketlist, ares_socket_t fd,
354 size_t *alloc_cnt, size_t *num)
355 {
356 if (*num >= *alloc_cnt) {
357 /* Grow by powers of 2 */
358 size_t new_alloc = (*alloc_cnt) << 1;
359 ares_socket_t *new_list =
360 ares_realloc(socketlist, new_alloc * sizeof(*new_list));
361 if (new_list == NULL) {
362 return 0;
363 }
364 *alloc_cnt = new_alloc;
365 *socketlist = new_list;
366 }
367
368 (*socketlist)[(*num)++] = fd;
369 return 1;
370 }
371
channel_socket_list(const ares_channel_t * channel,size_t * num)372 static ares_socket_t *channel_socket_list(const ares_channel_t *channel,
373 size_t *num)
374 {
375 size_t alloc_cnt = 1 << 4;
376 ares_socket_t *out = ares_malloc(alloc_cnt * sizeof(*out));
377 ares__slist_node_t *snode;
378
379 *num = 0;
380
381 if (out == NULL) {
382 return NULL;
383 }
384
385 for (snode = ares__slist_node_first(channel->servers); snode != NULL;
386 snode = ares__slist_node_next(snode)) {
387 struct server_state *server = ares__slist_node_val(snode);
388 ares__llist_node_t *node;
389
390 for (node = ares__llist_node_first(server->connections); node != NULL;
391 node = ares__llist_node_next(node)) {
392 const struct server_connection *conn = ares__llist_node_val(node);
393
394 if (conn->fd == ARES_SOCKET_BAD) {
395 continue;
396 }
397
398 if (!socket_list_append(&out, conn->fd, &alloc_cnt, num)) {
399 goto fail;
400 }
401 }
402 }
403
404 return out;
405
406 fail:
407 ares_free(out);
408 *num = 0;
409 return NULL;
410 }
411
412 /* If any UDP sockets select true for reading, process them. */
read_udp_packets_fd(ares_channel_t * channel,struct server_connection * conn,struct timeval * now)413 static void read_udp_packets_fd(ares_channel_t *channel,
414 struct server_connection *conn,
415 struct timeval *now)
416 {
417 ares_ssize_t read_len;
418 unsigned char buf[MAXENDSSZ + 1];
419
420 #ifdef HAVE_RECVFROM
421 ares_socklen_t fromlen;
422
423 union {
424 struct sockaddr sa;
425 struct sockaddr_in sa4;
426 struct sockaddr_in6 sa6;
427 } from;
428
429 memset(&from, 0, sizeof(from));
430 #endif
431
432 /* To reduce event loop overhead, read and process as many
433 * packets as we can. */
434 do {
435 if (conn->fd == ARES_SOCKET_BAD) {
436 read_len = -1;
437 } else {
438 if (conn->server->addr.family == AF_INET) {
439 fromlen = sizeof(from.sa4);
440 } else {
441 fromlen = sizeof(from.sa6);
442 }
443 read_len = ares__socket_recvfrom(channel, conn->fd, (void *)buf,
444 sizeof(buf), 0, &from.sa, &fromlen);
445 }
446
447 if (read_len == 0) {
448 /* UDP is connectionless, so result code of 0 is a 0-length UDP
449 * packet, and not an indication the connection is closed like on
450 * tcp */
451 continue;
452 } else if (read_len < 0) {
453 if (try_again(SOCKERRNO)) {
454 break;
455 }
456
457 handle_conn_error(conn, ARES_TRUE);
458 return;
459 #ifdef HAVE_RECVFROM
460 } else if (!same_address(&from.sa, &conn->server->addr)) {
461 /* The address the response comes from does not match the address we
462 * sent the request to. Someone may be attempting to perform a cache
463 * poisoning attack. */
464 continue;
465 #endif
466
467 } else {
468 process_answer(channel, buf, (size_t)read_len, conn, ARES_FALSE, now);
469 }
470
471 /* Try to read again only if *we* set up the socket, otherwise it may be
472 * a blocking socket and would cause recvfrom to hang. */
473 } while (read_len >= 0 && channel->sock_funcs == NULL);
474
475 ares__check_cleanup_conn(channel, conn);
476 }
477
read_packets(ares_channel_t * channel,fd_set * read_fds,ares_socket_t read_fd,struct timeval * now)478 static void read_packets(ares_channel_t *channel, fd_set *read_fds,
479 ares_socket_t read_fd, struct timeval *now)
480 {
481 size_t i;
482 ares_socket_t *socketlist = NULL;
483 size_t num_sockets = 0;
484 struct server_connection *conn = NULL;
485 ares__llist_node_t *node = NULL;
486
487 if (!read_fds && (read_fd == ARES_SOCKET_BAD)) {
488 /* no possible action */
489 return;
490 }
491
492 /* Single socket specified */
493 if (!read_fds) {
494 node = ares__htable_asvp_get_direct(channel->connnode_by_socket, read_fd);
495 if (node == NULL) {
496 return;
497 }
498
499 conn = ares__llist_node_val(node);
500
501 if (conn->is_tcp) {
502 read_tcp_data(channel, conn, now);
503 } else {
504 read_udp_packets_fd(channel, conn, now);
505 }
506
507 return;
508 }
509
510 /* There is no good way to iterate across an fd_set, instead we must pull a
511 * list of all known fds, and iterate across that checking against the fd_set.
512 */
513 socketlist = channel_socket_list(channel, &num_sockets);
514
515 for (i = 0; i < num_sockets; i++) {
516 if (!FD_ISSET(socketlist[i], read_fds)) {
517 continue;
518 }
519
520 /* If there's an error and we close this socket, then open
521 * another with the same fd to talk to another server, then we
522 * don't want to think that it was the new socket that was
523 * ready. This is not disastrous, but is likely to result in
524 * extra system calls and confusion. */
525 FD_CLR(socketlist[i], read_fds);
526
527 node =
528 ares__htable_asvp_get_direct(channel->connnode_by_socket, socketlist[i]);
529 if (node == NULL) {
530 return;
531 }
532
533 conn = ares__llist_node_val(node);
534
535 if (conn->is_tcp) {
536 read_tcp_data(channel, conn, now);
537 } else {
538 read_udp_packets_fd(channel, conn, now);
539 }
540 }
541
542 ares_free(socketlist);
543 }
544
545 /* If any queries have timed out, note the timeout and move them on. */
process_timeouts(ares_channel_t * channel,struct timeval * now)546 static void process_timeouts(ares_channel_t *channel, struct timeval *now)
547 {
548 ares__slist_node_t *node =
549 ares__slist_node_first(channel->queries_by_timeout);
550 while (node != NULL) {
551 struct query *query = ares__slist_node_val(node);
552 /* Node might be removed, cache next */
553 ares__slist_node_t *next = ares__slist_node_next(node);
554 struct server_connection *conn;
555 /* Since this is sorted, as soon as we hit a query that isn't timed out,
556 * break */
557 if (!ares__timedout(now, &query->timeout)) {
558 break;
559 }
560
561 query->error_status = ARES_ETIMEOUT;
562 query->timeouts++;
563
564 conn = query->conn;
565 server_increment_failures(conn->server);
566 ares__requeue_query(query, now);
567 ares__check_cleanup_conn(channel, conn);
568
569 node = next;
570 }
571 }
572
rewrite_without_edns(ares_dns_record_t * qdnsrec,struct query * query)573 static ares_status_t rewrite_without_edns(ares_dns_record_t *qdnsrec,
574 struct query *query)
575 {
576 ares_status_t status;
577 size_t i;
578 ares_bool_t found_opt_rr = ARES_FALSE;
579 unsigned char *msg = NULL;
580 size_t msglen = 0;
581
582 /* Find and remove the OPT RR record */
583 for (i = 0; i < ares_dns_record_rr_cnt(qdnsrec, ARES_SECTION_ADDITIONAL);
584 i++) {
585 const ares_dns_rr_t *rr;
586 rr = ares_dns_record_rr_get(qdnsrec, ARES_SECTION_ADDITIONAL, i);
587 if (ares_dns_rr_get_type(rr) == ARES_REC_TYPE_OPT) {
588 ares_dns_record_rr_del(qdnsrec, ARES_SECTION_ADDITIONAL, i);
589 found_opt_rr = ARES_TRUE;
590 break;
591 }
592 }
593
594 if (!found_opt_rr) {
595 status = ARES_EFORMERR;
596 goto done;
597 }
598
599 /* Rewrite the DNS message */
600 status = ares_dns_write(qdnsrec, &msg, &msglen);
601 if (status != ARES_SUCCESS) {
602 goto done;
603 }
604
605 ares_free(query->qbuf);
606 query->qbuf = msg;
607 query->qlen = msglen;
608
609 done:
610 return status;
611 }
612
613 /* Handle an answer from a server. This must NEVER cleanup the
614 * server connection! Return something other than ARES_SUCCESS to cause
615 * the connection to be terminated after this call. */
process_answer(ares_channel_t * channel,const unsigned char * abuf,size_t alen,struct server_connection * conn,ares_bool_t tcp,struct timeval * now)616 static ares_status_t process_answer(ares_channel_t *channel,
617 const unsigned char *abuf, size_t alen,
618 struct server_connection *conn,
619 ares_bool_t tcp, struct timeval *now)
620 {
621 struct query *query;
622 /* Cache these as once ares__send_query() gets called, it may end up
623 * invalidating the connection all-together */
624 struct server_state *server = conn->server;
625 ares_dns_record_t *rdnsrec = NULL;
626 ares_dns_record_t *qdnsrec = NULL;
627 ares_status_t status;
628
629 /* Parse the response */
630 status = ares_dns_parse(abuf, alen, 0, &rdnsrec);
631 if (status != ARES_SUCCESS) {
632 /* Malformations are never accepted */
633 status = ARES_EBADRESP;
634 goto cleanup;
635 }
636
637 /* Find the query corresponding to this packet. The queries are
638 * hashed/bucketed by query id, so this lookup should be quick.
639 */
640 query = ares__htable_szvp_get_direct(channel->queries_by_qid,
641 ares_dns_record_get_id(rdnsrec));
642 if (!query) {
643 /* We may have stopped listening for this query, that's ok */
644 status = ARES_SUCCESS;
645 goto cleanup;
646 }
647
648 /* Parse the question we sent as we use it to compare */
649 status = ares_dns_parse(query->qbuf, query->qlen, 0, &qdnsrec);
650 if (status != ARES_SUCCESS) {
651 end_query(channel, query, status, NULL, 0);
652 goto cleanup;
653 }
654
655 /* Both the query id and the questions must be the same. We will drop any
656 * replies that aren't for the same query as this is considered invalid. */
657 if (!same_questions(qdnsrec, rdnsrec)) {
658 /* Possible qid conflict due to delayed response, that's ok */
659 status = ARES_SUCCESS;
660 goto cleanup;
661 }
662
663 /* At this point we know we've received an answer for this query, so we should
664 * remove it from the connection's queue so we can possibly invalidate the
665 * connection. Delay cleaning up the connection though as we may enqueue
666 * something new. */
667 ares__llist_node_destroy(query->node_queries_to_conn);
668 query->node_queries_to_conn = NULL;
669
670 /* If we use EDNS and server answers with FORMERR without an OPT RR, the
671 * protocol extension is not understood by the responder. We must retry the
672 * query without EDNS enabled. */
673 if (ares_dns_record_get_rcode(rdnsrec) == ARES_RCODE_FORMERR &&
674 ares_dns_has_opt_rr(qdnsrec) && !ares_dns_has_opt_rr(rdnsrec)) {
675 status = rewrite_without_edns(qdnsrec, query);
676 if (status != ARES_SUCCESS) {
677 end_query(channel, query, status, NULL, 0);
678 goto cleanup;
679 }
680
681 ares__send_query(query, now);
682 status = ARES_SUCCESS;
683 goto cleanup;
684 }
685
686 /* If we got a truncated UDP packet and are not ignoring truncation,
687 * don't accept the packet, and switch the query to TCP if we hadn't
688 * done so already.
689 */
690 if (ares_dns_record_get_flags(rdnsrec) & ARES_FLAG_TC && !tcp &&
691 !(channel->flags & ARES_FLAG_IGNTC)) {
692 query->using_tcp = ARES_TRUE;
693 ares__send_query(query, now);
694 status = ARES_SUCCESS; /* Switched to TCP is ok */
695 goto cleanup;
696 }
697
698 /* If we aren't passing through all error packets, discard packets
699 * with SERVFAIL, NOTIMP, or REFUSED response codes.
700 */
701 if (!(channel->flags & ARES_FLAG_NOCHECKRESP)) {
702 ares_dns_rcode_t rcode = ares_dns_record_get_rcode(rdnsrec);
703 if (rcode == ARES_RCODE_SERVFAIL || rcode == ARES_RCODE_NOTIMP ||
704 rcode == ARES_RCODE_REFUSED) {
705 switch (rcode) {
706 case ARES_RCODE_SERVFAIL:
707 query->error_status = ARES_ESERVFAIL;
708 break;
709 case ARES_RCODE_NOTIMP:
710 query->error_status = ARES_ENOTIMP;
711 break;
712 case ARES_RCODE_REFUSED:
713 query->error_status = ARES_EREFUSED;
714 break;
715 default:
716 break;
717 }
718
719 server_increment_failures(server);
720 ares__requeue_query(query, now);
721
722 /* Should any of these cause a connection termination?
723 * Maybe SERVER_FAILURE? */
724 status = ARES_SUCCESS;
725 goto cleanup;
726 }
727 }
728
729 /* If cache insertion was successful, it took ownership. We ignore
730 * other cache insertion failures. */
731 if (ares_qcache_insert(channel, now, query, rdnsrec) == ARES_SUCCESS) {
732 rdnsrec = NULL;
733 }
734
735 server_set_good(server);
736 end_query(channel, query, ARES_SUCCESS, abuf, alen);
737
738 status = ARES_SUCCESS;
739
740 cleanup:
741 ares_dns_record_destroy(rdnsrec);
742 ares_dns_record_destroy(qdnsrec);
743 return status;
744 }
745
handle_conn_error(struct server_connection * conn,ares_bool_t critical_failure)746 static void handle_conn_error(struct server_connection *conn,
747 ares_bool_t critical_failure)
748 {
749 struct server_state *server = conn->server;
750
751 /* Increment failures first before requeue so it is unlikely to requeue
752 * to the same server */
753 if (critical_failure) {
754 server_increment_failures(server);
755 }
756
757 /* This will requeue any connections automatically */
758 ares__close_connection(conn);
759 }
760
ares__requeue_query(struct query * query,struct timeval * now)761 ares_status_t ares__requeue_query(struct query *query, struct timeval *now)
762 {
763 ares_channel_t *channel = query->channel;
764 size_t max_tries = ares__slist_len(channel->servers) * channel->tries;
765
766 query->try_count++;
767
768 if (query->try_count < max_tries && !query->no_retries) {
769 return ares__send_query(query, now);
770 }
771
772 /* If we are here, all attempts to perform query failed. */
773 if (query->error_status == ARES_SUCCESS) {
774 query->error_status = ARES_ETIMEOUT;
775 }
776
777 end_query(channel, query, query->error_status, NULL, 0);
778 return ARES_ETIMEOUT;
779 }
780
781 /* Pick a random server from the list, we first get a random number in the
782 * range of the number of servers, then scan until we find that server in
783 * the list */
ares__random_server(ares_channel_t * channel)784 static struct server_state *ares__random_server(ares_channel_t *channel)
785 {
786 unsigned char c;
787 size_t cnt;
788 size_t idx;
789 ares__slist_node_t *node;
790 size_t num_servers = ares__slist_len(channel->servers);
791
792 /* Silence coverity, not possible */
793 if (num_servers == 0) {
794 return NULL;
795 }
796
797 ares__rand_bytes(channel->rand_state, &c, 1);
798
799 cnt = c;
800 idx = cnt % num_servers;
801
802 cnt = 0;
803 for (node = ares__slist_node_first(channel->servers); node != NULL;
804 node = ares__slist_node_next(node)) {
805 if (cnt == idx) {
806 return ares__slist_node_val(node);
807 }
808
809 cnt++;
810 }
811
812 return NULL;
813 }
814
ares__append_tcpbuf(struct server_state * server,const struct query * query)815 static ares_status_t ares__append_tcpbuf(struct server_state *server,
816 const struct query *query)
817 {
818 ares_status_t status;
819
820 status = ares__buf_append_be16(server->tcp_send, (unsigned short)query->qlen);
821 if (status != ARES_SUCCESS) {
822 return status;
823 }
824 return ares__buf_append(server->tcp_send, query->qbuf, query->qlen);
825 }
826
ares__calc_query_timeout(const struct query * query)827 static size_t ares__calc_query_timeout(const struct query *query)
828 {
829 const ares_channel_t *channel = query->channel;
830 size_t timeplus = channel->timeout;
831 size_t rounds;
832 size_t num_servers = ares__slist_len(channel->servers);
833
834 if (num_servers == 0) {
835 return 0;
836 }
837
838 /* For each trip through the entire server list, we want to double the
839 * retry from the last retry */
840 rounds = (query->try_count / num_servers);
841 if (rounds > 0) {
842 timeplus <<= rounds;
843 }
844
845 if (channel->maxtimeout && timeplus > channel->maxtimeout) {
846 timeplus = channel->maxtimeout;
847 }
848
849 /* Add some jitter to the retry timeout.
850 *
851 * Jitter is needed in situation when resolve requests are performed
852 * simultaneously from multiple hosts and DNS server throttle these requests.
853 * Adding randomness allows to avoid synchronisation of retries.
854 *
855 * Value of timeplus adjusted randomly to the range [0.5 * timeplus,
856 * timeplus].
857 */
858 if (rounds > 0) {
859 unsigned short r;
860 float delta_multiplier;
861
862 ares__rand_bytes(channel->rand_state, (unsigned char *)&r, sizeof(r));
863 delta_multiplier = ((float)r / USHRT_MAX) * 0.5f;
864 timeplus -= (size_t)((float)timeplus * delta_multiplier);
865 }
866
867 /* We want explicitly guarantee that timeplus is greater or equal to timeout
868 * specified in channel options. */
869 if (timeplus < channel->timeout) {
870 timeplus = channel->timeout;
871 }
872
873 return timeplus;
874 }
875
ares__send_query(struct query * query,struct timeval * now)876 ares_status_t ares__send_query(struct query *query, struct timeval *now)
877 {
878 ares_channel_t *channel = query->channel;
879 struct server_state *server;
880 struct server_connection *conn;
881 size_t timeplus;
882 ares_status_t status;
883 ares_bool_t new_connection = ARES_FALSE;
884
885 query->conn = NULL;
886
887 /* Choose the server to send the query to */
888 if (channel->rotate) {
889 server = ares__random_server(channel);
890 } else {
891 /* Pull first */
892 server = ares__slist_first_val(channel->servers);
893 }
894
895 if (server == NULL) {
896 end_query(channel, query, ARES_ENOSERVER /* ? */, NULL, 0);
897 return ARES_ENOSERVER;
898 }
899
900 if (query->using_tcp) {
901 size_t prior_len = 0;
902 /* Make sure the TCP socket for this server is set up and queue
903 * a send request.
904 */
905 if (server->tcp_conn == NULL) {
906 new_connection = ARES_TRUE;
907 status = ares__open_connection(channel, server, ARES_TRUE);
908 switch (status) {
909 /* Good result, continue on */
910 case ARES_SUCCESS:
911 break;
912
913 /* These conditions are retryable as they are server-specific
914 * error codes */
915 case ARES_ECONNREFUSED:
916 case ARES_EBADFAMILY:
917 server_increment_failures(server);
918 query->error_status = status;
919 return ares__requeue_query(query, now);
920
921 /* Anything else is not retryable, likely ENOMEM */
922 default:
923 end_query(channel, query, status, NULL, 0);
924 return status;
925 }
926 }
927
928 conn = server->tcp_conn;
929
930 prior_len = ares__buf_len(server->tcp_send);
931
932 status = ares__append_tcpbuf(server, query);
933 if (status != ARES_SUCCESS) {
934 end_query(channel, query, status, NULL, 0);
935
936 /* Only safe to kill connection if it was new, otherwise it should be
937 * cleaned up by another process later */
938 if (new_connection) {
939 ares__close_connection(conn);
940 }
941 return status;
942 }
943
944 if (prior_len == 0) {
945 SOCK_STATE_CALLBACK(channel, conn->fd, 1, 1);
946 }
947
948 } else {
949 ares__llist_node_t *node = ares__llist_node_first(server->connections);
950
951 /* Don't use the found connection if we've gone over the maximum number
952 * of queries. Also, skip over the TCP connection if it is the first in
953 * the list */
954 if (node != NULL) {
955 conn = ares__llist_node_val(node);
956 if (conn->is_tcp) {
957 node = NULL;
958 } else if (channel->udp_max_queries > 0 &&
959 conn->total_queries >= channel->udp_max_queries) {
960 node = NULL;
961 }
962 }
963
964 if (node == NULL) {
965 new_connection = ARES_TRUE;
966 status = ares__open_connection(channel, server, ARES_FALSE);
967 switch (status) {
968 /* Good result, continue on */
969 case ARES_SUCCESS:
970 break;
971
972 /* These conditions are retryable as they are server-specific
973 * error codes */
974 case ARES_ECONNREFUSED:
975 case ARES_EBADFAMILY:
976 server_increment_failures(server);
977 query->error_status = status;
978 return ares__requeue_query(query, now);
979
980 /* Anything else is not retryable, likely ENOMEM */
981 default:
982 end_query(channel, query, status, NULL, 0);
983 return status;
984 }
985 node = ares__llist_node_first(server->connections);
986 }
987
988 conn = ares__llist_node_val(node);
989 if (ares__socket_write(channel, conn->fd, query->qbuf, query->qlen) == -1) {
990 /* FIXME: Handle EAGAIN here since it likely can happen. */
991 server_increment_failures(server);
992 status = ares__requeue_query(query, now);
993
994 /* Only safe to kill connection if it was new, otherwise it should be
995 * cleaned up by another process later */
996 if (new_connection) {
997 ares__close_connection(conn);
998 }
999
1000 return status;
1001 }
1002 }
1003
1004 timeplus = ares__calc_query_timeout(query);
1005 /* Keep track of queries bucketed by timeout, so we can process
1006 * timeout events quickly.
1007 */
1008 ares__slist_node_destroy(query->node_queries_by_timeout);
1009 query->timeout = *now;
1010 timeadd(&query->timeout, timeplus);
1011 query->node_queries_by_timeout =
1012 ares__slist_insert(channel->queries_by_timeout, query);
1013 if (!query->node_queries_by_timeout) {
1014 end_query(channel, query, ARES_ENOMEM, NULL, 0);
1015 /* Only safe to kill connection if it was new, otherwise it should be
1016 * cleaned up by another process later */
1017 if (new_connection) {
1018 ares__close_connection(conn);
1019 }
1020 return ARES_ENOMEM;
1021 }
1022
1023 /* Keep track of queries bucketed by connection, so we can process errors
1024 * quickly. */
1025 ares__llist_node_destroy(query->node_queries_to_conn);
1026 query->node_queries_to_conn =
1027 ares__llist_insert_last(conn->queries_to_conn, query);
1028
1029 if (query->node_queries_to_conn == NULL) {
1030 end_query(channel, query, ARES_ENOMEM, NULL, 0);
1031 /* Only safe to kill connection if it was new, otherwise it should be
1032 * cleaned up by another process later */
1033 if (new_connection) {
1034 ares__close_connection(conn);
1035 }
1036 return ARES_ENOMEM;
1037 }
1038
1039 query->conn = conn;
1040 conn->total_queries++;
1041 return ARES_SUCCESS;
1042 }
1043
same_questions(const ares_dns_record_t * qrec,const ares_dns_record_t * arec)1044 static ares_bool_t same_questions(const ares_dns_record_t *qrec,
1045 const ares_dns_record_t *arec)
1046 {
1047 size_t i;
1048 ares_bool_t rv = ARES_FALSE;
1049
1050
1051 if (ares_dns_record_query_cnt(qrec) != ares_dns_record_query_cnt(arec)) {
1052 goto done;
1053 }
1054
1055 for (i = 0; i < ares_dns_record_query_cnt(qrec); i++) {
1056 const char *qname = NULL;
1057 const char *aname = NULL;
1058 ares_dns_rec_type_t qtype;
1059 ares_dns_rec_type_t atype;
1060 ares_dns_class_t qclass;
1061 ares_dns_class_t aclass;
1062
1063 if (ares_dns_record_query_get(qrec, i, &qname, &qtype, &qclass) !=
1064 ARES_SUCCESS ||
1065 qname == NULL) {
1066 goto done;
1067 }
1068
1069 if (ares_dns_record_query_get(arec, i, &aname, &atype, &aclass) !=
1070 ARES_SUCCESS ||
1071 aname == NULL) {
1072 goto done;
1073 }
1074 if (strcasecmp(qname, aname) != 0 || qtype != atype || qclass != aclass) {
1075 goto done;
1076 }
1077 }
1078
1079 rv = ARES_TRUE;
1080
1081 done:
1082 return rv;
1083 }
1084
same_address(const struct sockaddr * sa,const struct ares_addr * aa)1085 static ares_bool_t same_address(const struct sockaddr *sa,
1086 const struct ares_addr *aa)
1087 {
1088 const void *addr1;
1089 const void *addr2;
1090
1091 if (sa->sa_family == aa->family) {
1092 switch (aa->family) {
1093 case AF_INET:
1094 addr1 = &aa->addr.addr4;
1095 addr2 = &(CARES_INADDR_CAST(struct sockaddr_in *, sa))->sin_addr;
1096 if (memcmp(addr1, addr2, sizeof(aa->addr.addr4)) == 0) {
1097 return ARES_TRUE; /* match */
1098 }
1099 break;
1100 case AF_INET6:
1101 addr1 = &aa->addr.addr6;
1102 addr2 = &(CARES_INADDR_CAST(struct sockaddr_in6 *, sa))->sin6_addr;
1103 if (memcmp(addr1, addr2, sizeof(aa->addr.addr6)) == 0) {
1104 return ARES_TRUE; /* match */
1105 }
1106 break;
1107 default:
1108 break; /* LCOV_EXCL_LINE */
1109 }
1110 }
1111 return ARES_FALSE; /* different */
1112 }
1113
ares_detach_query(struct query * query)1114 static void ares_detach_query(struct query *query)
1115 {
1116 /* Remove the query from all the lists in which it is linked */
1117 ares__htable_szvp_remove(query->channel->queries_by_qid, query->qid);
1118 ares__slist_node_destroy(query->node_queries_by_timeout);
1119 ares__llist_node_destroy(query->node_queries_to_conn);
1120 ares__llist_node_destroy(query->node_all_queries);
1121 query->node_queries_by_timeout = NULL;
1122 query->node_queries_to_conn = NULL;
1123 query->node_all_queries = NULL;
1124 }
1125
end_query(ares_channel_t * channel,struct query * query,ares_status_t status,const unsigned char * abuf,size_t alen)1126 static void end_query(ares_channel_t *channel, struct query *query,
1127 ares_status_t status, const unsigned char *abuf,
1128 size_t alen)
1129 {
1130 /* Invoke the callback. */
1131 query->callback(query->arg, (int)status, (int)query->timeouts,
1132 /* due to prior design flaws, abuf isn't meant to be modified,
1133 * but bad prototypes, ugh. Lets cast off constfor compat. */
1134 (unsigned char *)((void *)((size_t)abuf)), (int)alen);
1135 ares__free_query(query);
1136
1137 /* Check and notify if no other queries are enqueued on the channel. This
1138 * must come after the callback and freeing the query for 2 reasons.
1139 * 1) The callback itself may enqueue a new query
1140 * 2) Technically the current query isn't detached until it is free()'d.
1141 */
1142 ares_queue_notify_empty(channel);
1143 }
1144
ares__free_query(struct query * query)1145 void ares__free_query(struct query *query)
1146 {
1147 ares_detach_query(query);
1148 /* Zero out some important stuff, to help catch bugs */
1149 query->callback = NULL;
1150 query->arg = NULL;
1151 /* Deallocate the memory associated with the query */
1152 ares_free(query->qbuf);
1153
1154 ares_free(query);
1155 }
1156