• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* MIT License
2  *
3  * Copyright (c) 1998 Massachusetts Institute of Technology
4  * Copyright (c) 2010 Daniel Stenberg
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * SPDX-License-Identifier: MIT
26  */
27 
28 #include "ares_setup.h"
29 
30 
31 #ifdef HAVE_STRINGS_H
32 #  include <strings.h>
33 #endif
34 #ifdef HAVE_SYS_IOCTL_H
35 #  include <sys/ioctl.h>
36 #endif
37 #ifdef NETWARE
38 #  include <sys/filio.h>
39 #endif
40 #ifdef HAVE_STDINT_H
41 #  include <stdint.h>
42 #endif
43 
44 #include <assert.h>
45 #include <fcntl.h>
46 #include <limits.h>
47 
48 #include "ares.h"
49 #include "ares_private.h"
50 #include "ares_nameser.h"
51 #include "ares_dns.h"
52 
53 static ares_bool_t try_again(int errnum);
54 static void        write_tcp_data(ares_channel_t *channel, fd_set *write_fds,
55                                   ares_socket_t write_fd);
56 static void        read_packets(ares_channel_t *channel, fd_set *read_fds,
57                                 ares_socket_t read_fd, struct timeval *now);
58 static void process_timeouts(ares_channel_t *channel, struct timeval *now);
59 static ares_status_t process_answer(ares_channel_t      *channel,
60                                     const unsigned char *abuf, size_t alen,
61                                     struct server_connection *conn,
62                                     ares_bool_t tcp, struct timeval *now);
63 static void          handle_conn_error(struct server_connection *conn,
64                                        ares_bool_t               critical_failure);
65 
66 static ares_bool_t   same_questions(const ares_dns_record_t *qrec,
67                                     const ares_dns_record_t *arec);
68 static ares_bool_t   same_address(const struct sockaddr  *sa,
69                                   const struct ares_addr *aa);
70 static void          end_query(ares_channel_t *channel, struct query *query,
71                                ares_status_t status, const unsigned char *abuf,
72                                size_t alen);
73 
server_increment_failures(struct server_state * server)74 static void          server_increment_failures(struct server_state *server)
75 {
76   ares__slist_node_t   *node;
77   const ares_channel_t *channel = server->channel;
78 
79   node = ares__slist_node_find(channel->servers, server);
80   if (node == NULL) {
81     return;
82   }
83   server->consec_failures++;
84   ares__slist_node_reinsert(node);
85 }
86 
server_set_good(struct server_state * server)87 static void server_set_good(struct server_state *server)
88 {
89   ares__slist_node_t   *node;
90   const ares_channel_t *channel = server->channel;
91 
92   if (!server->consec_failures) {
93     return;
94   }
95 
96   node = ares__slist_node_find(channel->servers, server);
97   if (node == NULL) {
98     return;
99   }
100 
101   server->consec_failures = 0;
102   ares__slist_node_reinsert(node);
103 }
104 
105 /* return true if now is exactly check time or later */
ares__timedout(const struct timeval * now,const struct timeval * check)106 ares_bool_t ares__timedout(const struct timeval *now,
107                            const struct timeval *check)
108 {
109   ares_int64_t secs = ((ares_int64_t)now->tv_sec - (ares_int64_t)check->tv_sec);
110 
111   if (secs > 0) {
112     return ARES_TRUE; /* yes, timed out */
113   }
114   if (secs < 0) {
115     return ARES_FALSE; /* nope, not timed out */
116   }
117 
118   /* if the full seconds were identical, check the sub second parts */
119   return ((ares_int64_t)now->tv_usec - (ares_int64_t)check->tv_usec) >= 0
120            ? ARES_TRUE
121            : ARES_FALSE;
122 }
123 
124 /* add the specific number of milliseconds to the time in the first argument */
timeadd(struct timeval * now,size_t millisecs)125 static void timeadd(struct timeval *now, size_t millisecs)
126 {
127   now->tv_sec  += (time_t)millisecs / 1000;
128   now->tv_usec += (time_t)((millisecs % 1000) * 1000);
129 
130   if (now->tv_usec >= 1000000) {
131     ++(now->tv_sec);
132     now->tv_usec -= 1000000;
133   }
134 }
135 
136 /*
137  * generic process function
138  */
processfds(ares_channel_t * channel,fd_set * read_fds,ares_socket_t read_fd,fd_set * write_fds,ares_socket_t write_fd)139 static void processfds(ares_channel_t *channel, fd_set *read_fds,
140                        ares_socket_t read_fd, fd_set *write_fds,
141                        ares_socket_t write_fd)
142 {
143   struct timeval now;
144 
145   if (channel == NULL) {
146     return;
147   }
148 
149   ares__channel_lock(channel);
150 
151   now = ares__tvnow();
152   read_packets(channel, read_fds, read_fd, &now);
153   process_timeouts(channel, &now);
154   /* Write last as the other 2 operations might have triggered writes */
155   write_tcp_data(channel, write_fds, write_fd);
156 
157   ares__channel_unlock(channel);
158 }
159 
160 /* Something interesting happened on the wire, or there was a timeout.
161  * See what's up and respond accordingly.
162  */
ares_process(ares_channel_t * channel,fd_set * read_fds,fd_set * write_fds)163 void ares_process(ares_channel_t *channel, fd_set *read_fds, fd_set *write_fds)
164 {
165   processfds(channel, read_fds, ARES_SOCKET_BAD, write_fds, ARES_SOCKET_BAD);
166 }
167 
168 /* Something interesting happened on the wire, or there was a timeout.
169  * See what's up and respond accordingly.
170  */
ares_process_fd(ares_channel_t * channel,ares_socket_t read_fd,ares_socket_t write_fd)171 void ares_process_fd(ares_channel_t *channel,
172                      ares_socket_t   read_fd, /* use ARES_SOCKET_BAD or valid
173                                                  file descriptors */
174                      ares_socket_t   write_fd)
175 {
176   processfds(channel, NULL, read_fd, NULL, write_fd);
177 }
178 
179 /* Return 1 if the specified error number describes a readiness error, or 0
180  * otherwise. This is mostly for HP-UX, which could return EAGAIN or
181  * EWOULDBLOCK. See this man page
182  *
183  * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html?
184  *     manpage=/usr/share/man/man2.Z/send.2
185  */
try_again(int errnum)186 static ares_bool_t try_again(int errnum)
187 {
188 #if !defined EWOULDBLOCK && !defined EAGAIN
189 #  error "Neither EWOULDBLOCK nor EAGAIN defined"
190 #endif
191 
192 #ifdef EWOULDBLOCK
193   if (errnum == EWOULDBLOCK) {
194     return ARES_TRUE;
195   }
196 #endif
197 
198 #if defined EAGAIN && EAGAIN != EWOULDBLOCK
199   if (errnum == EAGAIN) {
200     return ARES_TRUE;
201   }
202 #endif
203 
204   return ARES_FALSE;
205 }
206 
207 /* If any TCP sockets select true for writing, write out queued data
208  * we have for them.
209  */
write_tcp_data(ares_channel_t * channel,fd_set * write_fds,ares_socket_t write_fd)210 static void write_tcp_data(ares_channel_t *channel, fd_set *write_fds,
211                            ares_socket_t write_fd)
212 {
213   ares__slist_node_t *node;
214 
215   if (!write_fds && (write_fd == ARES_SOCKET_BAD)) {
216     /* no possible action */
217     return;
218   }
219 
220   for (node = ares__slist_node_first(channel->servers); node != NULL;
221        node = ares__slist_node_next(node)) {
222     struct server_state *server = ares__slist_node_val(node);
223     const unsigned char *data;
224     size_t               data_len;
225     ares_ssize_t         count;
226 
227     /* Make sure server has data to send and is selected in write_fds or
228        write_fd. */
229     if (ares__buf_len(server->tcp_send) == 0 || server->tcp_conn == NULL) {
230       continue;
231     }
232 
233     if (write_fds) {
234       if (!FD_ISSET(server->tcp_conn->fd, write_fds)) {
235         continue;
236       }
237     } else {
238       if (server->tcp_conn->fd != write_fd) {
239         continue;
240       }
241     }
242 
243     if (write_fds) {
244       /* If there's an error and we close this socket, then open
245        * another with the same fd to talk to another server, then we
246        * don't want to think that it was the new socket that was
247        * ready. This is not disastrous, but is likely to result in
248        * extra system calls and confusion. */
249       FD_CLR(server->tcp_conn->fd, write_fds);
250     }
251 
252     data  = ares__buf_peek(server->tcp_send, &data_len);
253     count = ares__socket_write(channel, server->tcp_conn->fd, data, data_len);
254     if (count <= 0) {
255       if (!try_again(SOCKERRNO)) {
256         handle_conn_error(server->tcp_conn, ARES_TRUE);
257       }
258       continue;
259     }
260 
261     /* Strip data written from the buffer */
262     ares__buf_consume(server->tcp_send, (size_t)count);
263 
264     /* Notify state callback all data is written */
265     if (ares__buf_len(server->tcp_send) == 0) {
266       SOCK_STATE_CALLBACK(channel, server->tcp_conn->fd, 1, 0);
267     }
268   }
269 }
270 
271 /* If any TCP socket selects true for reading, read some data,
272  * allocate a buffer if we finish reading the length word, and process
273  * a packet if we finish reading one.
274  */
read_tcp_data(ares_channel_t * channel,struct server_connection * conn,struct timeval * now)275 static void read_tcp_data(ares_channel_t           *channel,
276                           struct server_connection *conn, struct timeval *now)
277 {
278   ares_ssize_t         count;
279   struct server_state *server = conn->server;
280 
281   /* Fetch buffer to store data we are reading */
282   size_t               ptr_len = 65535;
283   unsigned char       *ptr;
284 
285   ptr = ares__buf_append_start(server->tcp_parser, &ptr_len);
286 
287   if (ptr == NULL) {
288     handle_conn_error(conn, ARES_FALSE /* not critical to connection */);
289     return; /* bail out on malloc failure. TODO: make this
290                function return error codes */
291   }
292 
293   /* Read from socket */
294   count = ares__socket_recv(channel, conn->fd, ptr, ptr_len);
295   if (count <= 0) {
296     ares__buf_append_finish(server->tcp_parser, 0);
297     if (!(count == -1 && try_again(SOCKERRNO))) {
298       handle_conn_error(conn, ARES_TRUE);
299     }
300     return;
301   }
302 
303   /* Record amount of data read */
304   ares__buf_append_finish(server->tcp_parser, (size_t)count);
305 
306   /* Process all queued answers */
307   while (1) {
308     unsigned short       dns_len  = 0;
309     const unsigned char *data     = NULL;
310     size_t               data_len = 0;
311     ares_status_t        status;
312 
313     /* Tag so we can roll back */
314     ares__buf_tag(server->tcp_parser);
315 
316     /* Read length indicator */
317     if (ares__buf_fetch_be16(server->tcp_parser, &dns_len) != ARES_SUCCESS) {
318       ares__buf_tag_rollback(server->tcp_parser);
319       break;
320     }
321 
322     /* Not enough data for a full response yet */
323     if (ares__buf_consume(server->tcp_parser, dns_len) != ARES_SUCCESS) {
324       ares__buf_tag_rollback(server->tcp_parser);
325       break;
326     }
327 
328     /* Can't fail except for misuse */
329     data = ares__buf_tag_fetch(server->tcp_parser, &data_len);
330     if (data == NULL) {
331       ares__buf_tag_clear(server->tcp_parser);
332       break;
333     }
334 
335     /* Strip off 2 bytes length */
336     data     += 2;
337     data_len -= 2;
338 
339     /* We finished reading this answer; process it */
340     status = process_answer(channel, data, data_len, conn, ARES_TRUE, now);
341     if (status != ARES_SUCCESS) {
342       handle_conn_error(conn, ARES_TRUE);
343       return;
344     }
345 
346     /* Since we processed the answer, clear the tag so space can be reclaimed */
347     ares__buf_tag_clear(server->tcp_parser);
348   }
349 
350   ares__check_cleanup_conn(channel, conn);
351 }
352 
socket_list_append(ares_socket_t ** socketlist,ares_socket_t fd,size_t * alloc_cnt,size_t * num)353 static int socket_list_append(ares_socket_t **socketlist, ares_socket_t fd,
354                               size_t *alloc_cnt, size_t *num)
355 {
356   if (*num >= *alloc_cnt) {
357     /* Grow by powers of 2 */
358     size_t         new_alloc = (*alloc_cnt) << 1;
359     ares_socket_t *new_list =
360       ares_realloc(socketlist, new_alloc * sizeof(*new_list));
361     if (new_list == NULL) {
362       return 0;
363     }
364     *alloc_cnt  = new_alloc;
365     *socketlist = new_list;
366   }
367 
368   (*socketlist)[(*num)++] = fd;
369   return 1;
370 }
371 
channel_socket_list(const ares_channel_t * channel,size_t * num)372 static ares_socket_t *channel_socket_list(const ares_channel_t *channel,
373                                           size_t               *num)
374 {
375   size_t              alloc_cnt = 1 << 4;
376   ares_socket_t      *out       = ares_malloc(alloc_cnt * sizeof(*out));
377   ares__slist_node_t *snode;
378 
379   *num = 0;
380 
381   if (out == NULL) {
382     return NULL;
383   }
384 
385   for (snode = ares__slist_node_first(channel->servers); snode != NULL;
386        snode = ares__slist_node_next(snode)) {
387     struct server_state *server = ares__slist_node_val(snode);
388     ares__llist_node_t  *node;
389 
390     for (node = ares__llist_node_first(server->connections); node != NULL;
391          node = ares__llist_node_next(node)) {
392       const struct server_connection *conn = ares__llist_node_val(node);
393 
394       if (conn->fd == ARES_SOCKET_BAD) {
395         continue;
396       }
397 
398       if (!socket_list_append(&out, conn->fd, &alloc_cnt, num)) {
399         goto fail;
400       }
401     }
402   }
403 
404   return out;
405 
406 fail:
407   ares_free(out);
408   *num = 0;
409   return NULL;
410 }
411 
412 /* If any UDP sockets select true for reading, process them. */
read_udp_packets_fd(ares_channel_t * channel,struct server_connection * conn,struct timeval * now)413 static void read_udp_packets_fd(ares_channel_t           *channel,
414                                 struct server_connection *conn,
415                                 struct timeval           *now)
416 {
417   ares_ssize_t  read_len;
418   unsigned char buf[MAXENDSSZ + 1];
419 
420 #ifdef HAVE_RECVFROM
421   ares_socklen_t fromlen;
422 
423   union {
424     struct sockaddr     sa;
425     struct sockaddr_in  sa4;
426     struct sockaddr_in6 sa6;
427   } from;
428 
429   memset(&from, 0, sizeof(from));
430 #endif
431 
432   /* To reduce event loop overhead, read and process as many
433    * packets as we can. */
434   do {
435     if (conn->fd == ARES_SOCKET_BAD) {
436       read_len = -1;
437     } else {
438       if (conn->server->addr.family == AF_INET) {
439         fromlen = sizeof(from.sa4);
440       } else {
441         fromlen = sizeof(from.sa6);
442       }
443       read_len = ares__socket_recvfrom(channel, conn->fd, (void *)buf,
444                                        sizeof(buf), 0, &from.sa, &fromlen);
445     }
446 
447     if (read_len == 0) {
448       /* UDP is connectionless, so result code of 0 is a 0-length UDP
449        * packet, and not an indication the connection is closed like on
450        * tcp */
451       continue;
452     } else if (read_len < 0) {
453       if (try_again(SOCKERRNO)) {
454         break;
455       }
456 
457       handle_conn_error(conn, ARES_TRUE);
458       return;
459 #ifdef HAVE_RECVFROM
460     } else if (!same_address(&from.sa, &conn->server->addr)) {
461       /* The address the response comes from does not match the address we
462        * sent the request to. Someone may be attempting to perform a cache
463        * poisoning attack. */
464       continue;
465 #endif
466 
467     } else {
468       process_answer(channel, buf, (size_t)read_len, conn, ARES_FALSE, now);
469     }
470 
471     /* Try to read again only if *we* set up the socket, otherwise it may be
472      * a blocking socket and would cause recvfrom to hang. */
473   } while (read_len >= 0 && channel->sock_funcs == NULL);
474 
475   ares__check_cleanup_conn(channel, conn);
476 }
477 
read_packets(ares_channel_t * channel,fd_set * read_fds,ares_socket_t read_fd,struct timeval * now)478 static void read_packets(ares_channel_t *channel, fd_set *read_fds,
479                          ares_socket_t read_fd, struct timeval *now)
480 {
481   size_t                    i;
482   ares_socket_t            *socketlist  = NULL;
483   size_t                    num_sockets = 0;
484   struct server_connection *conn        = NULL;
485   ares__llist_node_t       *node        = NULL;
486 
487   if (!read_fds && (read_fd == ARES_SOCKET_BAD)) {
488     /* no possible action */
489     return;
490   }
491 
492   /* Single socket specified */
493   if (!read_fds) {
494     node = ares__htable_asvp_get_direct(channel->connnode_by_socket, read_fd);
495     if (node == NULL) {
496       return;
497     }
498 
499     conn = ares__llist_node_val(node);
500 
501     if (conn->is_tcp) {
502       read_tcp_data(channel, conn, now);
503     } else {
504       read_udp_packets_fd(channel, conn, now);
505     }
506 
507     return;
508   }
509 
510   /* There is no good way to iterate across an fd_set, instead we must pull a
511    * list of all known fds, and iterate across that checking against the fd_set.
512    */
513   socketlist = channel_socket_list(channel, &num_sockets);
514 
515   for (i = 0; i < num_sockets; i++) {
516     if (!FD_ISSET(socketlist[i], read_fds)) {
517       continue;
518     }
519 
520     /* If there's an error and we close this socket, then open
521      * another with the same fd to talk to another server, then we
522      * don't want to think that it was the new socket that was
523      * ready. This is not disastrous, but is likely to result in
524      * extra system calls and confusion. */
525     FD_CLR(socketlist[i], read_fds);
526 
527     node =
528       ares__htable_asvp_get_direct(channel->connnode_by_socket, socketlist[i]);
529     if (node == NULL) {
530       return;
531     }
532 
533     conn = ares__llist_node_val(node);
534 
535     if (conn->is_tcp) {
536       read_tcp_data(channel, conn, now);
537     } else {
538       read_udp_packets_fd(channel, conn, now);
539     }
540   }
541 
542   ares_free(socketlist);
543 }
544 
545 /* If any queries have timed out, note the timeout and move them on. */
process_timeouts(ares_channel_t * channel,struct timeval * now)546 static void process_timeouts(ares_channel_t *channel, struct timeval *now)
547 {
548   ares__slist_node_t *node =
549     ares__slist_node_first(channel->queries_by_timeout);
550   while (node != NULL) {
551     struct query             *query = ares__slist_node_val(node);
552     /* Node might be removed, cache next */
553     ares__slist_node_t       *next = ares__slist_node_next(node);
554     struct server_connection *conn;
555     /* Since this is sorted, as soon as we hit a query that isn't timed out,
556      * break */
557     if (!ares__timedout(now, &query->timeout)) {
558       break;
559     }
560 
561     query->error_status = ARES_ETIMEOUT;
562     query->timeouts++;
563 
564     conn = query->conn;
565     server_increment_failures(conn->server);
566     ares__requeue_query(query, now);
567     ares__check_cleanup_conn(channel, conn);
568 
569     node = next;
570   }
571 }
572 
rewrite_without_edns(ares_dns_record_t * qdnsrec,struct query * query)573 static ares_status_t rewrite_without_edns(ares_dns_record_t *qdnsrec,
574                                           struct query      *query)
575 {
576   ares_status_t  status;
577   size_t         i;
578   ares_bool_t    found_opt_rr = ARES_FALSE;
579   unsigned char *msg          = NULL;
580   size_t         msglen       = 0;
581 
582   /* Find and remove the OPT RR record */
583   for (i = 0; i < ares_dns_record_rr_cnt(qdnsrec, ARES_SECTION_ADDITIONAL);
584        i++) {
585     const ares_dns_rr_t *rr;
586     rr = ares_dns_record_rr_get(qdnsrec, ARES_SECTION_ADDITIONAL, i);
587     if (ares_dns_rr_get_type(rr) == ARES_REC_TYPE_OPT) {
588       ares_dns_record_rr_del(qdnsrec, ARES_SECTION_ADDITIONAL, i);
589       found_opt_rr = ARES_TRUE;
590       break;
591     }
592   }
593 
594   if (!found_opt_rr) {
595     status = ARES_EFORMERR;
596     goto done;
597   }
598 
599   /* Rewrite the DNS message */
600   status = ares_dns_write(qdnsrec, &msg, &msglen);
601   if (status != ARES_SUCCESS) {
602     goto done;
603   }
604 
605   ares_free(query->qbuf);
606   query->qbuf = msg;
607   query->qlen = msglen;
608 
609 done:
610   return status;
611 }
612 
613 /* Handle an answer from a server. This must NEVER cleanup the
614  * server connection! Return something other than ARES_SUCCESS to cause
615  * the connection to be terminated after this call. */
process_answer(ares_channel_t * channel,const unsigned char * abuf,size_t alen,struct server_connection * conn,ares_bool_t tcp,struct timeval * now)616 static ares_status_t process_answer(ares_channel_t      *channel,
617                                     const unsigned char *abuf, size_t alen,
618                                     struct server_connection *conn,
619                                     ares_bool_t tcp, struct timeval *now)
620 {
621   struct query        *query;
622   /* Cache these as once ares__send_query() gets called, it may end up
623    * invalidating the connection all-together */
624   struct server_state *server  = conn->server;
625   ares_dns_record_t   *rdnsrec = NULL;
626   ares_dns_record_t   *qdnsrec = NULL;
627   ares_status_t        status;
628 
629   /* Parse the response */
630   status = ares_dns_parse(abuf, alen, 0, &rdnsrec);
631   if (status != ARES_SUCCESS) {
632     /* Malformations are never accepted */
633     status = ARES_EBADRESP;
634     goto cleanup;
635   }
636 
637   /* Find the query corresponding to this packet. The queries are
638    * hashed/bucketed by query id, so this lookup should be quick.
639    */
640   query = ares__htable_szvp_get_direct(channel->queries_by_qid,
641                                        ares_dns_record_get_id(rdnsrec));
642   if (!query) {
643     /* We may have stopped listening for this query, that's ok */
644     status = ARES_SUCCESS;
645     goto cleanup;
646   }
647 
648   /* Parse the question we sent as we use it to compare */
649   status = ares_dns_parse(query->qbuf, query->qlen, 0, &qdnsrec);
650   if (status != ARES_SUCCESS) {
651     end_query(channel, query, status, NULL, 0);
652     goto cleanup;
653   }
654 
655   /* Both the query id and the questions must be the same. We will drop any
656    * replies that aren't for the same query as this is considered invalid. */
657   if (!same_questions(qdnsrec, rdnsrec)) {
658     /* Possible qid conflict due to delayed response, that's ok */
659     status = ARES_SUCCESS;
660     goto cleanup;
661   }
662 
663   /* At this point we know we've received an answer for this query, so we should
664    * remove it from the connection's queue so we can possibly invalidate the
665    * connection. Delay cleaning up the connection though as we may enqueue
666    * something new.  */
667   ares__llist_node_destroy(query->node_queries_to_conn);
668   query->node_queries_to_conn = NULL;
669 
670   /* If we use EDNS and server answers with FORMERR without an OPT RR, the
671    * protocol extension is not understood by the responder. We must retry the
672    * query without EDNS enabled. */
673   if (ares_dns_record_get_rcode(rdnsrec) == ARES_RCODE_FORMERR &&
674       ares_dns_has_opt_rr(qdnsrec) && !ares_dns_has_opt_rr(rdnsrec)) {
675     status = rewrite_without_edns(qdnsrec, query);
676     if (status != ARES_SUCCESS) {
677       end_query(channel, query, status, NULL, 0);
678       goto cleanup;
679     }
680 
681     ares__send_query(query, now);
682     status = ARES_SUCCESS;
683     goto cleanup;
684   }
685 
686   /* If we got a truncated UDP packet and are not ignoring truncation,
687    * don't accept the packet, and switch the query to TCP if we hadn't
688    * done so already.
689    */
690   if (ares_dns_record_get_flags(rdnsrec) & ARES_FLAG_TC && !tcp &&
691       !(channel->flags & ARES_FLAG_IGNTC)) {
692     query->using_tcp = ARES_TRUE;
693     ares__send_query(query, now);
694     status = ARES_SUCCESS; /* Switched to TCP is ok */
695     goto cleanup;
696   }
697 
698   /* If we aren't passing through all error packets, discard packets
699    * with SERVFAIL, NOTIMP, or REFUSED response codes.
700    */
701   if (!(channel->flags & ARES_FLAG_NOCHECKRESP)) {
702     ares_dns_rcode_t rcode = ares_dns_record_get_rcode(rdnsrec);
703     if (rcode == ARES_RCODE_SERVFAIL || rcode == ARES_RCODE_NOTIMP ||
704         rcode == ARES_RCODE_REFUSED) {
705       switch (rcode) {
706         case ARES_RCODE_SERVFAIL:
707           query->error_status = ARES_ESERVFAIL;
708           break;
709         case ARES_RCODE_NOTIMP:
710           query->error_status = ARES_ENOTIMP;
711           break;
712         case ARES_RCODE_REFUSED:
713           query->error_status = ARES_EREFUSED;
714           break;
715         default:
716           break;
717       }
718 
719       server_increment_failures(server);
720       ares__requeue_query(query, now);
721 
722       /* Should any of these cause a connection termination?
723        * Maybe SERVER_FAILURE? */
724       status = ARES_SUCCESS;
725       goto cleanup;
726     }
727   }
728 
729   /* If cache insertion was successful, it took ownership.  We ignore
730    * other cache insertion failures. */
731   if (ares_qcache_insert(channel, now, query, rdnsrec) == ARES_SUCCESS) {
732     rdnsrec = NULL;
733   }
734 
735   server_set_good(server);
736   end_query(channel, query, ARES_SUCCESS, abuf, alen);
737 
738   status = ARES_SUCCESS;
739 
740 cleanup:
741   ares_dns_record_destroy(rdnsrec);
742   ares_dns_record_destroy(qdnsrec);
743   return status;
744 }
745 
handle_conn_error(struct server_connection * conn,ares_bool_t critical_failure)746 static void handle_conn_error(struct server_connection *conn,
747                               ares_bool_t               critical_failure)
748 {
749   struct server_state *server = conn->server;
750 
751   /* Increment failures first before requeue so it is unlikely to requeue
752    * to the same server */
753   if (critical_failure) {
754     server_increment_failures(server);
755   }
756 
757   /* This will requeue any connections automatically */
758   ares__close_connection(conn);
759 }
760 
ares__requeue_query(struct query * query,struct timeval * now)761 ares_status_t ares__requeue_query(struct query *query, struct timeval *now)
762 {
763   ares_channel_t *channel = query->channel;
764   size_t max_tries        = ares__slist_len(channel->servers) * channel->tries;
765 
766   query->try_count++;
767 
768   if (query->try_count < max_tries && !query->no_retries) {
769     return ares__send_query(query, now);
770   }
771 
772   /* If we are here, all attempts to perform query failed. */
773   if (query->error_status == ARES_SUCCESS) {
774     query->error_status = ARES_ETIMEOUT;
775   }
776 
777   end_query(channel, query, query->error_status, NULL, 0);
778   return ARES_ETIMEOUT;
779 }
780 
781 /* Pick a random server from the list, we first get a random number in the
782  * range of the number of servers, then scan until we find that server in
783  * the list */
ares__random_server(ares_channel_t * channel)784 static struct server_state *ares__random_server(ares_channel_t *channel)
785 {
786   unsigned char       c;
787   size_t              cnt;
788   size_t              idx;
789   ares__slist_node_t *node;
790   size_t              num_servers = ares__slist_len(channel->servers);
791 
792   /* Silence coverity, not possible */
793   if (num_servers == 0) {
794     return NULL;
795   }
796 
797   ares__rand_bytes(channel->rand_state, &c, 1);
798 
799   cnt = c;
800   idx = cnt % num_servers;
801 
802   cnt = 0;
803   for (node = ares__slist_node_first(channel->servers); node != NULL;
804        node = ares__slist_node_next(node)) {
805     if (cnt == idx) {
806       return ares__slist_node_val(node);
807     }
808 
809     cnt++;
810   }
811 
812   return NULL;
813 }
814 
ares__append_tcpbuf(struct server_state * server,const struct query * query)815 static ares_status_t ares__append_tcpbuf(struct server_state *server,
816                                          const struct query  *query)
817 {
818   ares_status_t status;
819 
820   status = ares__buf_append_be16(server->tcp_send, (unsigned short)query->qlen);
821   if (status != ARES_SUCCESS) {
822     return status;
823   }
824   return ares__buf_append(server->tcp_send, query->qbuf, query->qlen);
825 }
826 
ares__calc_query_timeout(const struct query * query)827 static size_t ares__calc_query_timeout(const struct query *query)
828 {
829   const ares_channel_t *channel  = query->channel;
830   size_t                timeplus = channel->timeout;
831   size_t                rounds;
832   size_t                num_servers = ares__slist_len(channel->servers);
833 
834   if (num_servers == 0) {
835     return 0;
836   }
837 
838   /* For each trip through the entire server list, we want to double the
839    * retry from the last retry */
840   rounds = (query->try_count / num_servers);
841   if (rounds > 0) {
842     timeplus <<= rounds;
843   }
844 
845   if (channel->maxtimeout && timeplus > channel->maxtimeout) {
846     timeplus = channel->maxtimeout;
847   }
848 
849   /* Add some jitter to the retry timeout.
850    *
851    * Jitter is needed in situation when resolve requests are performed
852    * simultaneously from multiple hosts and DNS server throttle these requests.
853    * Adding randomness allows to avoid synchronisation of retries.
854    *
855    * Value of timeplus adjusted randomly to the range [0.5 * timeplus,
856    * timeplus].
857    */
858   if (rounds > 0) {
859     unsigned short r;
860     float          delta_multiplier;
861 
862     ares__rand_bytes(channel->rand_state, (unsigned char *)&r, sizeof(r));
863     delta_multiplier  = ((float)r / USHRT_MAX) * 0.5f;
864     timeplus         -= (size_t)((float)timeplus * delta_multiplier);
865   }
866 
867   /* We want explicitly guarantee that timeplus is greater or equal to timeout
868    * specified in channel options. */
869   if (timeplus < channel->timeout) {
870     timeplus = channel->timeout;
871   }
872 
873   return timeplus;
874 }
875 
ares__send_query(struct query * query,struct timeval * now)876 ares_status_t ares__send_query(struct query *query, struct timeval *now)
877 {
878   ares_channel_t           *channel = query->channel;
879   struct server_state      *server;
880   struct server_connection *conn;
881   size_t                    timeplus;
882   ares_status_t             status;
883   ares_bool_t               new_connection = ARES_FALSE;
884 
885   query->conn = NULL;
886 
887   /* Choose the server to send the query to */
888   if (channel->rotate) {
889     server = ares__random_server(channel);
890   } else {
891     /* Pull first */
892     server = ares__slist_first_val(channel->servers);
893   }
894 
895   if (server == NULL) {
896     end_query(channel, query, ARES_ENOSERVER /* ? */, NULL, 0);
897     return ARES_ENOSERVER;
898   }
899 
900   if (query->using_tcp) {
901     size_t prior_len = 0;
902     /* Make sure the TCP socket for this server is set up and queue
903      * a send request.
904      */
905     if (server->tcp_conn == NULL) {
906       new_connection = ARES_TRUE;
907       status         = ares__open_connection(channel, server, ARES_TRUE);
908       switch (status) {
909         /* Good result, continue on */
910         case ARES_SUCCESS:
911           break;
912 
913         /* These conditions are retryable as they are server-specific
914          * error codes */
915         case ARES_ECONNREFUSED:
916         case ARES_EBADFAMILY:
917           server_increment_failures(server);
918           query->error_status = status;
919           return ares__requeue_query(query, now);
920 
921         /* Anything else is not retryable, likely ENOMEM */
922         default:
923           end_query(channel, query, status, NULL, 0);
924           return status;
925       }
926     }
927 
928     conn = server->tcp_conn;
929 
930     prior_len = ares__buf_len(server->tcp_send);
931 
932     status = ares__append_tcpbuf(server, query);
933     if (status != ARES_SUCCESS) {
934       end_query(channel, query, status, NULL, 0);
935 
936       /* Only safe to kill connection if it was new, otherwise it should be
937        * cleaned up by another process later */
938       if (new_connection) {
939         ares__close_connection(conn);
940       }
941       return status;
942     }
943 
944     if (prior_len == 0) {
945       SOCK_STATE_CALLBACK(channel, conn->fd, 1, 1);
946     }
947 
948   } else {
949     ares__llist_node_t *node = ares__llist_node_first(server->connections);
950 
951     /* Don't use the found connection if we've gone over the maximum number
952      * of queries. Also, skip over the TCP connection if it is the first in
953      * the list */
954     if (node != NULL) {
955       conn = ares__llist_node_val(node);
956       if (conn->is_tcp) {
957         node = NULL;
958       } else if (channel->udp_max_queries > 0 &&
959                  conn->total_queries >= channel->udp_max_queries) {
960         node = NULL;
961       }
962     }
963 
964     if (node == NULL) {
965       new_connection = ARES_TRUE;
966       status         = ares__open_connection(channel, server, ARES_FALSE);
967       switch (status) {
968         /* Good result, continue on */
969         case ARES_SUCCESS:
970           break;
971 
972         /* These conditions are retryable as they are server-specific
973          * error codes */
974         case ARES_ECONNREFUSED:
975         case ARES_EBADFAMILY:
976           server_increment_failures(server);
977           query->error_status = status;
978           return ares__requeue_query(query, now);
979 
980         /* Anything else is not retryable, likely ENOMEM */
981         default:
982           end_query(channel, query, status, NULL, 0);
983           return status;
984       }
985       node = ares__llist_node_first(server->connections);
986     }
987 
988     conn = ares__llist_node_val(node);
989     if (ares__socket_write(channel, conn->fd, query->qbuf, query->qlen) == -1) {
990       /* FIXME: Handle EAGAIN here since it likely can happen. */
991       server_increment_failures(server);
992       status = ares__requeue_query(query, now);
993 
994       /* Only safe to kill connection if it was new, otherwise it should be
995        * cleaned up by another process later */
996       if (new_connection) {
997         ares__close_connection(conn);
998       }
999 
1000       return status;
1001     }
1002   }
1003 
1004   timeplus = ares__calc_query_timeout(query);
1005   /* Keep track of queries bucketed by timeout, so we can process
1006    * timeout events quickly.
1007    */
1008   ares__slist_node_destroy(query->node_queries_by_timeout);
1009   query->timeout = *now;
1010   timeadd(&query->timeout, timeplus);
1011   query->node_queries_by_timeout =
1012     ares__slist_insert(channel->queries_by_timeout, query);
1013   if (!query->node_queries_by_timeout) {
1014     end_query(channel, query, ARES_ENOMEM, NULL, 0);
1015     /* Only safe to kill connection if it was new, otherwise it should be
1016      * cleaned up by another process later */
1017     if (new_connection) {
1018       ares__close_connection(conn);
1019     }
1020     return ARES_ENOMEM;
1021   }
1022 
1023   /* Keep track of queries bucketed by connection, so we can process errors
1024    * quickly. */
1025   ares__llist_node_destroy(query->node_queries_to_conn);
1026   query->node_queries_to_conn =
1027     ares__llist_insert_last(conn->queries_to_conn, query);
1028 
1029   if (query->node_queries_to_conn == NULL) {
1030     end_query(channel, query, ARES_ENOMEM, NULL, 0);
1031     /* Only safe to kill connection if it was new, otherwise it should be
1032      * cleaned up by another process later */
1033     if (new_connection) {
1034       ares__close_connection(conn);
1035     }
1036     return ARES_ENOMEM;
1037   }
1038 
1039   query->conn = conn;
1040   conn->total_queries++;
1041   return ARES_SUCCESS;
1042 }
1043 
same_questions(const ares_dns_record_t * qrec,const ares_dns_record_t * arec)1044 static ares_bool_t same_questions(const ares_dns_record_t *qrec,
1045                                   const ares_dns_record_t *arec)
1046 {
1047   size_t      i;
1048   ares_bool_t rv = ARES_FALSE;
1049 
1050 
1051   if (ares_dns_record_query_cnt(qrec) != ares_dns_record_query_cnt(arec)) {
1052     goto done;
1053   }
1054 
1055   for (i = 0; i < ares_dns_record_query_cnt(qrec); i++) {
1056     const char         *qname = NULL;
1057     const char         *aname = NULL;
1058     ares_dns_rec_type_t qtype;
1059     ares_dns_rec_type_t atype;
1060     ares_dns_class_t    qclass;
1061     ares_dns_class_t    aclass;
1062 
1063     if (ares_dns_record_query_get(qrec, i, &qname, &qtype, &qclass) !=
1064           ARES_SUCCESS ||
1065         qname == NULL) {
1066       goto done;
1067     }
1068 
1069     if (ares_dns_record_query_get(arec, i, &aname, &atype, &aclass) !=
1070           ARES_SUCCESS ||
1071         aname == NULL) {
1072       goto done;
1073     }
1074     if (strcasecmp(qname, aname) != 0 || qtype != atype || qclass != aclass) {
1075       goto done;
1076     }
1077   }
1078 
1079   rv = ARES_TRUE;
1080 
1081 done:
1082   return rv;
1083 }
1084 
same_address(const struct sockaddr * sa,const struct ares_addr * aa)1085 static ares_bool_t same_address(const struct sockaddr  *sa,
1086                                 const struct ares_addr *aa)
1087 {
1088   const void *addr1;
1089   const void *addr2;
1090 
1091   if (sa->sa_family == aa->family) {
1092     switch (aa->family) {
1093       case AF_INET:
1094         addr1 = &aa->addr.addr4;
1095         addr2 = &(CARES_INADDR_CAST(struct sockaddr_in *, sa))->sin_addr;
1096         if (memcmp(addr1, addr2, sizeof(aa->addr.addr4)) == 0) {
1097           return ARES_TRUE; /* match */
1098         }
1099         break;
1100       case AF_INET6:
1101         addr1 = &aa->addr.addr6;
1102         addr2 = &(CARES_INADDR_CAST(struct sockaddr_in6 *, sa))->sin6_addr;
1103         if (memcmp(addr1, addr2, sizeof(aa->addr.addr6)) == 0) {
1104           return ARES_TRUE; /* match */
1105         }
1106         break;
1107       default:
1108         break; /* LCOV_EXCL_LINE */
1109     }
1110   }
1111   return ARES_FALSE; /* different */
1112 }
1113 
ares_detach_query(struct query * query)1114 static void ares_detach_query(struct query *query)
1115 {
1116   /* Remove the query from all the lists in which it is linked */
1117   ares__htable_szvp_remove(query->channel->queries_by_qid, query->qid);
1118   ares__slist_node_destroy(query->node_queries_by_timeout);
1119   ares__llist_node_destroy(query->node_queries_to_conn);
1120   ares__llist_node_destroy(query->node_all_queries);
1121   query->node_queries_by_timeout = NULL;
1122   query->node_queries_to_conn    = NULL;
1123   query->node_all_queries        = NULL;
1124 }
1125 
end_query(ares_channel_t * channel,struct query * query,ares_status_t status,const unsigned char * abuf,size_t alen)1126 static void end_query(ares_channel_t *channel, struct query *query,
1127                       ares_status_t status, const unsigned char *abuf,
1128                       size_t alen)
1129 {
1130   /* Invoke the callback. */
1131   query->callback(query->arg, (int)status, (int)query->timeouts,
1132                   /* due to prior design flaws, abuf isn't meant to be modified,
1133                    * but bad prototypes, ugh.  Lets cast off constfor compat. */
1134                   (unsigned char *)((void *)((size_t)abuf)), (int)alen);
1135   ares__free_query(query);
1136 
1137   /* Check and notify if no other queries are enqueued on the channel.  This
1138    * must come after the callback and freeing the query for 2 reasons.
1139    *  1) The callback itself may enqueue a new query
1140    *  2) Technically the current query isn't detached until it is free()'d.
1141    */
1142   ares_queue_notify_empty(channel);
1143 }
1144 
ares__free_query(struct query * query)1145 void ares__free_query(struct query *query)
1146 {
1147   ares_detach_query(query);
1148   /* Zero out some important stuff, to help catch bugs */
1149   query->callback = NULL;
1150   query->arg      = NULL;
1151   /* Deallocate the memory associated with the query */
1152   ares_free(query->qbuf);
1153 
1154   ares_free(query);
1155 }
1156