1
2 /* Copyright 1998 by the Massachusetts Institute of Technology.
3 * Copyright (C) 2004-2017 by Daniel Stenberg
4 *
5 * Permission to use, copy, modify, and distribute this
6 * software and its documentation for any purpose and without
7 * fee is hereby granted, provided that the above copyright
8 * notice appear in all copies and that both that copyright
9 * notice and this permission notice appear in supporting
10 * documentation, and that the name of M.I.T. not be used in
11 * advertising or publicity pertaining to distribution of the
12 * software without specific, written prior permission.
13 * M.I.T. makes no representations about the suitability of
14 * this software for any purpose. It is provided "as is"
15 * without express or implied warranty.
16 */
17
18 #include "ares_setup.h"
19
20 #ifdef HAVE_SYS_UIO_H
21 # include <sys/uio.h>
22 #endif
23 #ifdef HAVE_NETINET_IN_H
24 # include <netinet/in.h>
25 #endif
26 #ifdef HAVE_NETINET_TCP_H
27 # include <netinet/tcp.h>
28 #endif
29 #ifdef HAVE_NETDB_H
30 # include <netdb.h>
31 #endif
32 #ifdef HAVE_ARPA_INET_H
33 # include <arpa/inet.h>
34 #endif
35
36 #include "ares_nameser.h"
37
38 #ifdef HAVE_STRINGS_H
39 # include <strings.h>
40 #endif
41 #ifdef HAVE_SYS_IOCTL_H
42 # include <sys/ioctl.h>
43 #endif
44 #ifdef NETWARE
45 # include <sys/filio.h>
46 #endif
47
48 #include <assert.h>
49 #include <fcntl.h>
50 #include <limits.h>
51
52 #include "ares.h"
53 #include "ares_dns.h"
54 #include "ares_nowarn.h"
55 #include "ares_private.h"
56
57
58 static int try_again(int errnum);
59 static void write_tcp_data(ares_channel channel, fd_set *write_fds,
60 ares_socket_t write_fd, struct timeval *now);
61 static void read_tcp_data(ares_channel channel, fd_set *read_fds,
62 ares_socket_t read_fd, struct timeval *now);
63 static void read_udp_packets(ares_channel channel, fd_set *read_fds,
64 ares_socket_t read_fd, struct timeval *now);
65 static void advance_tcp_send_queue(ares_channel channel, int whichserver,
66 ares_ssize_t num_bytes);
67 static void process_timeouts(ares_channel channel, struct timeval *now);
68 static void process_broken_connections(ares_channel channel,
69 struct timeval *now);
70 static void process_answer(ares_channel channel, unsigned char *abuf,
71 int alen, int whichserver, int tcp,
72 struct timeval *now);
73 static void handle_error(ares_channel channel, int whichserver,
74 struct timeval *now);
75 static void skip_server(ares_channel channel, struct query *query,
76 int whichserver);
77 static void next_server(ares_channel channel, struct query *query,
78 struct timeval *now);
79 static int open_tcp_socket(ares_channel channel, struct server_state *server);
80 static int open_udp_socket(ares_channel channel, struct server_state *server);
81 static int same_questions(const unsigned char *qbuf, int qlen,
82 const unsigned char *abuf, int alen);
83 static int same_address(struct sockaddr *sa, struct ares_addr *aa);
84 static int has_opt_rr(const unsigned char *abuf, int alen);
85 static void end_query(ares_channel channel, struct query *query, int status,
86 unsigned char *abuf, int alen);
87
88 /* return true if now is exactly check time or later */
ares__timedout(struct timeval * now,struct timeval * check)89 int ares__timedout(struct timeval *now,
90 struct timeval *check)
91 {
92 long secs = (now->tv_sec - check->tv_sec);
93
94 if(secs > 0)
95 return 1; /* yes, timed out */
96 if(secs < 0)
97 return 0; /* nope, not timed out */
98
99 /* if the full seconds were identical, check the sub second parts */
100 return (now->tv_usec - check->tv_usec >= 0);
101 }
102
103 /* add the specific number of milliseconds to the time in the first argument */
timeadd(struct timeval * now,int millisecs)104 static void timeadd(struct timeval *now, int millisecs)
105 {
106 now->tv_sec += millisecs/1000;
107 now->tv_usec += (millisecs%1000)*1000;
108
109 if(now->tv_usec >= 1000000) {
110 ++(now->tv_sec);
111 now->tv_usec -= 1000000;
112 }
113 }
114
115 /*
116 * generic process function
117 */
processfds(ares_channel channel,fd_set * read_fds,ares_socket_t read_fd,fd_set * write_fds,ares_socket_t write_fd)118 static void processfds(ares_channel channel,
119 fd_set *read_fds, ares_socket_t read_fd,
120 fd_set *write_fds, ares_socket_t write_fd)
121 {
122 struct timeval now = ares__tvnow();
123
124 write_tcp_data(channel, write_fds, write_fd, &now);
125 read_tcp_data(channel, read_fds, read_fd, &now);
126 read_udp_packets(channel, read_fds, read_fd, &now);
127 process_timeouts(channel, &now);
128 process_broken_connections(channel, &now);
129 }
130
131 /* Something interesting happened on the wire, or there was a timeout.
132 * See what's up and respond accordingly.
133 */
ares_process(ares_channel channel,fd_set * read_fds,fd_set * write_fds)134 void ares_process(ares_channel channel, fd_set *read_fds, fd_set *write_fds)
135 {
136 processfds(channel, read_fds, ARES_SOCKET_BAD, write_fds, ARES_SOCKET_BAD);
137 }
138
139 /* Something interesting happened on the wire, or there was a timeout.
140 * See what's up and respond accordingly.
141 */
ares_process_fd(ares_channel channel,ares_socket_t read_fd,ares_socket_t write_fd)142 void ares_process_fd(ares_channel channel,
143 ares_socket_t read_fd, /* use ARES_SOCKET_BAD or valid
144 file descriptors */
145 ares_socket_t write_fd)
146 {
147 processfds(channel, NULL, read_fd, NULL, write_fd);
148 }
149
150
151 /* Return 1 if the specified error number describes a readiness error, or 0
152 * otherwise. This is mostly for HP-UX, which could return EAGAIN or
153 * EWOULDBLOCK. See this man page
154 *
155 * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html?
156 * manpage=/usr/share/man/man2.Z/send.2
157 */
try_again(int errnum)158 static int try_again(int errnum)
159 {
160 #if !defined EWOULDBLOCK && !defined EAGAIN
161 #error "Neither EWOULDBLOCK nor EAGAIN defined"
162 #endif
163 switch (errnum)
164 {
165 #ifdef EWOULDBLOCK
166 case EWOULDBLOCK:
167 return 1;
168 #endif
169 #if defined EAGAIN && EAGAIN != EWOULDBLOCK
170 case EAGAIN:
171 return 1;
172 #endif
173 }
174 return 0;
175 }
176
socket_writev(ares_channel channel,ares_socket_t s,const struct iovec * vec,int len)177 static ares_ssize_t socket_writev(ares_channel channel, ares_socket_t s, const struct iovec * vec, int len)
178 {
179 if (channel->sock_funcs)
180 return channel->sock_funcs->asendv(s, vec, len, channel->sock_func_cb_data);
181
182 return writev(s, vec, len);
183 }
184
socket_write(ares_channel channel,ares_socket_t s,const void * data,size_t len)185 static ares_ssize_t socket_write(ares_channel channel, ares_socket_t s, const void * data, size_t len)
186 {
187 if (channel->sock_funcs)
188 {
189 struct iovec vec;
190 vec.iov_base = (void*)data;
191 vec.iov_len = len;
192 return channel->sock_funcs->asendv(s, &vec, 1, channel->sock_func_cb_data);
193 }
194 return swrite(s, data, len);
195 }
196
197 /* If any TCP sockets select true for writing, write out queued data
198 * we have for them.
199 */
write_tcp_data(ares_channel channel,fd_set * write_fds,ares_socket_t write_fd,struct timeval * now)200 static void write_tcp_data(ares_channel channel,
201 fd_set *write_fds,
202 ares_socket_t write_fd,
203 struct timeval *now)
204 {
205 struct server_state *server;
206 struct send_request *sendreq;
207 struct iovec *vec;
208 int i;
209 ares_ssize_t scount;
210 ares_ssize_t wcount;
211 size_t n;
212 /* From writev manpage: An implementation can advertise its limit by defining
213 IOV_MAX in <limits.h> or at run time via the return value from
214 sysconf(_SC_IOV_MAX). On modern Linux systems, the limit is 1024. Back in
215 Linux 2.0 days, this limit was 16. */
216 #if defined(IOV_MAX)
217 const size_t maxn = IOV_MAX; /* FreeBSD */
218 #elif defined(_SC_IOV_MAX)
219 const size_t maxn = sysconf(_SC_IOV_MAX); /* Linux */
220 #else
221 const size_t maxn = 16; /* Safe default */
222 #endif
223
224 if(!write_fds && (write_fd == ARES_SOCKET_BAD))
225 /* no possible action */
226 return;
227
228 for (i = 0; i < channel->nservers; i++)
229 {
230 /* Make sure server has data to send and is selected in write_fds or
231 write_fd. */
232 server = &channel->servers[i];
233 if (!server->qhead || server->tcp_socket == ARES_SOCKET_BAD ||
234 server->is_broken)
235 continue;
236
237 if(write_fds) {
238 if(!FD_ISSET(server->tcp_socket, write_fds))
239 continue;
240 }
241 else {
242 if(server->tcp_socket != write_fd)
243 continue;
244 }
245
246 if(write_fds)
247 /* If there's an error and we close this socket, then open
248 * another with the same fd to talk to another server, then we
249 * don't want to think that it was the new socket that was
250 * ready. This is not disastrous, but is likely to result in
251 * extra system calls and confusion. */
252 FD_CLR(server->tcp_socket, write_fds);
253
254 /* Count the number of send queue items. */
255 n = 0;
256 for (sendreq = server->qhead; sendreq; sendreq = sendreq->next)
257 n++;
258
259 /* Allocate iovecs so we can send all our data at once. */
260 vec = ares_malloc(n * sizeof(struct iovec));
261 if (vec)
262 {
263 /* Fill in the iovecs and send. */
264 n = 0;
265 for (sendreq = server->qhead; sendreq; sendreq = sendreq->next)
266 {
267 vec[n].iov_base = (char *) sendreq->data;
268 vec[n].iov_len = sendreq->len;
269 n++;
270 if(n >= maxn)
271 break;
272 }
273 wcount = socket_writev(channel, server->tcp_socket, vec, (int)n);
274 ares_free(vec);
275 if (wcount < 0)
276 {
277 if (!try_again(SOCKERRNO))
278 handle_error(channel, i, now);
279 continue;
280 }
281
282 /* Advance the send queue by as many bytes as we sent. */
283 advance_tcp_send_queue(channel, i, wcount);
284 }
285 else
286 {
287 /* Can't allocate iovecs; just send the first request. */
288 sendreq = server->qhead;
289
290 scount = socket_write(channel, server->tcp_socket, sendreq->data, sendreq->len);
291 if (scount < 0)
292 {
293 if (!try_again(SOCKERRNO))
294 handle_error(channel, i, now);
295 continue;
296 }
297
298 /* Advance the send queue by as many bytes as we sent. */
299 advance_tcp_send_queue(channel, i, scount);
300 }
301 }
302 }
303
304 /* Consume the given number of bytes from the head of the TCP send queue. */
advance_tcp_send_queue(ares_channel channel,int whichserver,ares_ssize_t num_bytes)305 static void advance_tcp_send_queue(ares_channel channel, int whichserver,
306 ares_ssize_t num_bytes)
307 {
308 struct send_request *sendreq;
309 struct server_state *server = &channel->servers[whichserver];
310 while (num_bytes > 0) {
311 sendreq = server->qhead;
312 if ((size_t)num_bytes >= sendreq->len) {
313 num_bytes -= sendreq->len;
314 server->qhead = sendreq->next;
315 if (sendreq->data_storage)
316 ares_free(sendreq->data_storage);
317 ares_free(sendreq);
318 if (server->qhead == NULL) {
319 SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 0);
320 server->qtail = NULL;
321
322 /* qhead is NULL so we cannot continue this loop */
323 break;
324 }
325 }
326 else {
327 sendreq->data += num_bytes;
328 sendreq->len -= num_bytes;
329 num_bytes = 0;
330 }
331 }
332 }
333
socket_recvfrom(ares_channel channel,ares_socket_t s,void * data,size_t data_len,int flags,struct sockaddr * from,ares_socklen_t * from_len)334 static ares_ssize_t socket_recvfrom(ares_channel channel,
335 ares_socket_t s,
336 void * data,
337 size_t data_len,
338 int flags,
339 struct sockaddr *from,
340 ares_socklen_t *from_len)
341 {
342 if (channel->sock_funcs)
343 return channel->sock_funcs->arecvfrom(s, data, data_len,
344 flags, from, from_len,
345 channel->sock_func_cb_data);
346
347 #ifdef HAVE_RECVFROM
348 return recvfrom(s, data, data_len, flags, from, from_len);
349 #else
350 return sread(s, data, data_len);
351 #endif
352 }
353
socket_recv(ares_channel channel,ares_socket_t s,void * data,size_t data_len)354 static ares_ssize_t socket_recv(ares_channel channel,
355 ares_socket_t s,
356 void * data,
357 size_t data_len)
358 {
359 if (channel->sock_funcs)
360 return channel->sock_funcs->arecvfrom(s, data, data_len, 0, 0, 0,
361 channel->sock_func_cb_data);
362
363 return sread(s, data, data_len);
364 }
365
366 /* If any TCP socket selects true for reading, read some data,
367 * allocate a buffer if we finish reading the length word, and process
368 * a packet if we finish reading one.
369 */
read_tcp_data(ares_channel channel,fd_set * read_fds,ares_socket_t read_fd,struct timeval * now)370 static void read_tcp_data(ares_channel channel, fd_set *read_fds,
371 ares_socket_t read_fd, struct timeval *now)
372 {
373 struct server_state *server;
374 int i;
375 ares_ssize_t count;
376
377 if(!read_fds && (read_fd == ARES_SOCKET_BAD))
378 /* no possible action */
379 return;
380
381 for (i = 0; i < channel->nservers; i++)
382 {
383 /* Make sure the server has a socket and is selected in read_fds. */
384 server = &channel->servers[i];
385 if (server->tcp_socket == ARES_SOCKET_BAD || server->is_broken)
386 continue;
387
388 if(read_fds) {
389 if(!FD_ISSET(server->tcp_socket, read_fds))
390 continue;
391 }
392 else {
393 if(server->tcp_socket != read_fd)
394 continue;
395 }
396
397 if(read_fds)
398 /* If there's an error and we close this socket, then open another
399 * with the same fd to talk to another server, then we don't want to
400 * think that it was the new socket that was ready. This is not
401 * disastrous, but is likely to result in extra system calls and
402 * confusion. */
403 FD_CLR(server->tcp_socket, read_fds);
404
405 if (server->tcp_lenbuf_pos != 2)
406 {
407 /* We haven't yet read a length word, so read that (or
408 * what's left to read of it).
409 */
410 count = socket_recv(channel, server->tcp_socket,
411 server->tcp_lenbuf + server->tcp_lenbuf_pos,
412 2 - server->tcp_lenbuf_pos);
413 if (count <= 0)
414 {
415 if (!(count == -1 && try_again(SOCKERRNO)))
416 handle_error(channel, i, now);
417 continue;
418 }
419
420 server->tcp_lenbuf_pos += (int)count;
421 if (server->tcp_lenbuf_pos == 2)
422 {
423 /* We finished reading the length word. Decode the
424 * length and allocate a buffer for the data.
425 */
426 server->tcp_length = server->tcp_lenbuf[0] << 8
427 | server->tcp_lenbuf[1];
428 server->tcp_buffer = ares_malloc(server->tcp_length);
429 if (!server->tcp_buffer) {
430 handle_error(channel, i, now);
431 return; /* bail out on malloc failure. TODO: make this
432 function return error codes */
433 }
434 server->tcp_buffer_pos = 0;
435 }
436 }
437 else
438 {
439 /* Read data into the allocated buffer. */
440 count = socket_recv(channel, server->tcp_socket,
441 server->tcp_buffer + server->tcp_buffer_pos,
442 server->tcp_length - server->tcp_buffer_pos);
443 if (count <= 0)
444 {
445 if (!(count == -1 && try_again(SOCKERRNO)))
446 handle_error(channel, i, now);
447 continue;
448 }
449
450 server->tcp_buffer_pos += (int)count;
451 if (server->tcp_buffer_pos == server->tcp_length)
452 {
453 /* We finished reading this answer; process it and
454 * prepare to read another length word.
455 */
456 process_answer(channel, server->tcp_buffer, server->tcp_length,
457 i, 1, now);
458 ares_free(server->tcp_buffer);
459 server->tcp_buffer = NULL;
460 server->tcp_lenbuf_pos = 0;
461 server->tcp_buffer_pos = 0;
462 }
463 }
464 }
465 }
466
467 /* If any UDP sockets select true for reading, process them. */
read_udp_packets(ares_channel channel,fd_set * read_fds,ares_socket_t read_fd,struct timeval * now)468 static void read_udp_packets(ares_channel channel, fd_set *read_fds,
469 ares_socket_t read_fd, struct timeval *now)
470 {
471 struct server_state *server;
472 int i;
473 ares_ssize_t read_len;
474 unsigned char buf[MAXENDSSZ + 1];
475 #ifdef HAVE_RECVFROM
476 ares_socklen_t fromlen;
477 union {
478 struct sockaddr sa;
479 struct sockaddr_in sa4;
480 struct sockaddr_in6 sa6;
481 } from;
482 #endif
483
484 if(!read_fds && (read_fd == ARES_SOCKET_BAD))
485 /* no possible action */
486 return;
487
488 for (i = 0; i < channel->nservers; i++)
489 {
490 /* Make sure the server has a socket and is selected in read_fds. */
491 server = &channel->servers[i];
492
493 if (server->udp_socket == ARES_SOCKET_BAD || server->is_broken)
494 continue;
495
496 if(read_fds) {
497 if(!FD_ISSET(server->udp_socket, read_fds))
498 continue;
499 }
500 else {
501 if(server->udp_socket != read_fd)
502 continue;
503 }
504
505 if(read_fds)
506 /* If there's an error and we close this socket, then open
507 * another with the same fd to talk to another server, then we
508 * don't want to think that it was the new socket that was
509 * ready. This is not disastrous, but is likely to result in
510 * extra system calls and confusion. */
511 FD_CLR(server->udp_socket, read_fds);
512
513 /* To reduce event loop overhead, read and process as many
514 * packets as we can. */
515 do {
516 if (server->udp_socket == ARES_SOCKET_BAD) {
517 read_len = -1;
518 } else {
519 if (server->addr.family == AF_INET) {
520 fromlen = sizeof(from.sa4);
521 } else {
522 fromlen = sizeof(from.sa6);
523 }
524 read_len = socket_recvfrom(channel, server->udp_socket, (void *)buf,
525 sizeof(buf), 0, &from.sa, &fromlen);
526 }
527
528 if (read_len == 0) {
529 /* UDP is connectionless, so result code of 0 is a 0-length UDP
530 * packet, and not an indication the connection is closed like on
531 * tcp */
532 continue;
533 } else if (read_len < 0) {
534 if (try_again(SOCKERRNO))
535 continue;
536
537 handle_error(channel, i, now);
538
539 #ifdef HAVE_RECVFROM
540 } else if (!same_address(&from.sa, &server->addr)) {
541 /* The address the response comes from does not match the address we
542 * sent the request to. Someone may be attempting to perform a cache
543 * poisoning attack. */
544 continue;
545 #endif
546
547 } else {
548 process_answer(channel, buf, (int)read_len, i, 0, now);
549 }
550 } while (read_len >= 0);
551 }
552 }
553
554 /* If any queries have timed out, note the timeout and move them on. */
process_timeouts(ares_channel channel,struct timeval * now)555 static void process_timeouts(ares_channel channel, struct timeval *now)
556 {
557 time_t t; /* the time of the timeouts we're processing */
558 struct query *query;
559 struct list_node* list_head;
560 struct list_node* list_node;
561
562 /* Process all the timeouts that have fired since the last time we processed
563 * timeouts. If things are going well, then we'll have hundreds/thousands of
564 * queries that fall into future buckets, and only a handful of requests
565 * that fall into the "now" bucket, so this should be quite quick.
566 */
567 for (t = channel->last_timeout_processed; t <= now->tv_sec; t++)
568 {
569 list_head = &(channel->queries_by_timeout[t % ARES_TIMEOUT_TABLE_SIZE]);
570 for (list_node = list_head->next; list_node != list_head; )
571 {
572 query = list_node->data;
573 list_node = list_node->next; /* in case the query gets deleted */
574 if (query->timeout.tv_sec && ares__timedout(now, &query->timeout))
575 {
576 query->error_status = ARES_ETIMEOUT;
577 ++query->timeouts;
578 next_server(channel, query, now);
579 }
580 }
581 }
582 channel->last_timeout_processed = now->tv_sec;
583 }
584
585 /* Handle an answer from a server. */
process_answer(ares_channel channel,unsigned char * abuf,int alen,int whichserver,int tcp,struct timeval * now)586 static void process_answer(ares_channel channel, unsigned char *abuf,
587 int alen, int whichserver, int tcp,
588 struct timeval *now)
589 {
590 int tc, rcode, packetsz;
591 unsigned short id;
592 struct query *query;
593 struct list_node* list_head;
594 struct list_node* list_node;
595
596 /* If there's no room in the answer for a header, we can't do much
597 * with it. */
598 if (alen < HFIXEDSZ)
599 return;
600
601 /* Grab the query ID, truncate bit, and response code from the packet. */
602 id = DNS_HEADER_QID(abuf);
603 tc = DNS_HEADER_TC(abuf);
604 rcode = DNS_HEADER_RCODE(abuf);
605
606 /* Find the query corresponding to this packet. The queries are
607 * hashed/bucketed by query id, so this lookup should be quick. Note that
608 * both the query id and the questions must be the same; when the query id
609 * wraps around we can have multiple outstanding queries with the same query
610 * id, so we need to check both the id and question.
611 */
612 query = NULL;
613 list_head = &(channel->queries_by_qid[id % ARES_QID_TABLE_SIZE]);
614 for (list_node = list_head->next; list_node != list_head;
615 list_node = list_node->next)
616 {
617 struct query *q = list_node->data;
618 if ((q->qid == id) && same_questions(q->qbuf, q->qlen, abuf, alen))
619 {
620 query = q;
621 break;
622 }
623 }
624 if (!query)
625 return;
626
627 packetsz = PACKETSZ;
628 /* If we use EDNS and server answers with FORMERR without an OPT RR, the protocol
629 * extension is not understood by the responder. We must retry the query
630 * without EDNS enabled. */
631 if (channel->flags & ARES_FLAG_EDNS)
632 {
633 packetsz = channel->ednspsz;
634 if (rcode == FORMERR && has_opt_rr(abuf, alen) != 1)
635 {
636 int qlen = (query->tcplen - 2) - EDNSFIXEDSZ;
637 channel->flags ^= ARES_FLAG_EDNS;
638 query->tcplen -= EDNSFIXEDSZ;
639 query->qlen -= EDNSFIXEDSZ;
640 query->tcpbuf[0] = (unsigned char)((qlen >> 8) & 0xff);
641 query->tcpbuf[1] = (unsigned char)(qlen & 0xff);
642 DNS_HEADER_SET_ARCOUNT(query->tcpbuf + 2, 0);
643 query->tcpbuf = ares_realloc(query->tcpbuf, query->tcplen);
644 query->qbuf = query->tcpbuf + 2;
645 ares__send_query(channel, query, now);
646 return;
647 }
648 }
649
650 /* If we got a truncated UDP packet and are not ignoring truncation,
651 * don't accept the packet, and switch the query to TCP if we hadn't
652 * done so already.
653 */
654 if ((tc || alen > packetsz) && !tcp && !(channel->flags & ARES_FLAG_IGNTC))
655 {
656 if (!query->using_tcp)
657 {
658 query->using_tcp = 1;
659 ares__send_query(channel, query, now);
660 }
661 return;
662 }
663
664 /* Limit alen to PACKETSZ if we aren't using TCP (only relevant if we
665 * are ignoring truncation.
666 */
667 if (alen > packetsz && !tcp)
668 alen = packetsz;
669
670 /* If we aren't passing through all error packets, discard packets
671 * with SERVFAIL, NOTIMP, or REFUSED response codes.
672 */
673 if (!(channel->flags & ARES_FLAG_NOCHECKRESP))
674 {
675 if (rcode == SERVFAIL || rcode == NOTIMP || rcode == REFUSED)
676 {
677 skip_server(channel, query, whichserver);
678 if (query->server == whichserver)
679 next_server(channel, query, now);
680 return;
681 }
682 }
683
684 end_query(channel, query, ARES_SUCCESS, abuf, alen);
685 }
686
687 /* Close all the connections that are no longer usable. */
process_broken_connections(ares_channel channel,struct timeval * now)688 static void process_broken_connections(ares_channel channel,
689 struct timeval *now)
690 {
691 int i;
692 for (i = 0; i < channel->nservers; i++)
693 {
694 struct server_state *server = &channel->servers[i];
695 if (server->is_broken)
696 {
697 handle_error(channel, i, now);
698 }
699 }
700 }
701
702 /* Swap the contents of two lists */
swap_lists(struct list_node * head_a,struct list_node * head_b)703 static void swap_lists(struct list_node* head_a,
704 struct list_node* head_b)
705 {
706 int is_a_empty = ares__is_list_empty(head_a);
707 int is_b_empty = ares__is_list_empty(head_b);
708 struct list_node old_a = *head_a;
709 struct list_node old_b = *head_b;
710
711 if (is_a_empty) {
712 ares__init_list_head(head_b);
713 } else {
714 *head_b = old_a;
715 old_a.next->prev = head_b;
716 old_a.prev->next = head_b;
717 }
718 if (is_b_empty) {
719 ares__init_list_head(head_a);
720 } else {
721 *head_a = old_b;
722 old_b.next->prev = head_a;
723 old_b.prev->next = head_a;
724 }
725 }
726
handle_error(ares_channel channel,int whichserver,struct timeval * now)727 static void handle_error(ares_channel channel, int whichserver,
728 struct timeval *now)
729 {
730 struct server_state *server;
731 struct query *query;
732 struct list_node list_head;
733 struct list_node* list_node;
734
735 server = &channel->servers[whichserver];
736
737 /* Reset communications with this server. */
738 ares__close_sockets(channel, server);
739
740 /* Tell all queries talking to this server to move on and not try this
741 * server again. We steal the current list of queries that were in-flight to
742 * this server, since when we call next_server this can cause the queries to
743 * be re-sent to this server, which will re-insert these queries in that
744 * same server->queries_to_server list.
745 */
746 ares__init_list_head(&list_head);
747 swap_lists(&list_head, &(server->queries_to_server));
748 for (list_node = list_head.next; list_node != &list_head; )
749 {
750 query = list_node->data;
751 list_node = list_node->next; /* in case the query gets deleted */
752 assert(query->server == whichserver);
753 skip_server(channel, query, whichserver);
754 next_server(channel, query, now);
755 }
756 /* Each query should have removed itself from our temporary list as
757 * it re-sent itself or finished up...
758 */
759 assert(ares__is_list_empty(&list_head));
760 }
761
skip_server(ares_channel channel,struct query * query,int whichserver)762 static void skip_server(ares_channel channel, struct query *query,
763 int whichserver)
764 {
765 /* The given server gave us problems with this query, so if we have the
766 * luxury of using other servers, then let's skip the potentially broken
767 * server and just use the others. If we only have one server and we need to
768 * retry then we should just go ahead and re-use that server, since it's our
769 * only hope; perhaps we just got unlucky, and retrying will work (eg, the
770 * server timed out our TCP connection just as we were sending another
771 * request).
772 */
773 if (channel->nservers > 1)
774 {
775 query->server_info[whichserver].skip_server = 1;
776 }
777 }
778
next_server(ares_channel channel,struct query * query,struct timeval * now)779 static void next_server(ares_channel channel, struct query *query,
780 struct timeval *now)
781 {
782 /* We need to try each server channel->tries times. We have channel->nservers
783 * servers to try. In total, we need to do channel->nservers * channel->tries
784 * attempts. Use query->try to remember how many times we already attempted
785 * this query. Use modular arithmetic to find the next server to try. */
786 while (++(query->try_count) < (channel->nservers * channel->tries))
787 {
788 struct server_state *server;
789
790 /* Move on to the next server. */
791 query->server = (query->server + 1) % channel->nservers;
792 server = &channel->servers[query->server];
793
794 /* We don't want to use this server if (1) we decided this connection is
795 * broken, and thus about to be closed, (2) we've decided to skip this
796 * server because of earlier errors we encountered, or (3) we already
797 * sent this query over this exact connection.
798 */
799 if (!server->is_broken &&
800 !query->server_info[query->server].skip_server &&
801 !(query->using_tcp &&
802 (query->server_info[query->server].tcp_connection_generation ==
803 server->tcp_connection_generation)))
804 {
805 ares__send_query(channel, query, now);
806 return;
807 }
808
809 /* You might think that with TCP we only need one try. However, even
810 * when using TCP, servers can time-out our connection just as we're
811 * sending a request, or close our connection because they die, or never
812 * send us a reply because they get wedged or tickle a bug that drops
813 * our request.
814 */
815 }
816
817 /* If we are here, all attempts to perform query failed. */
818 end_query(channel, query, query->error_status, NULL, 0);
819 }
820
ares__send_query(ares_channel channel,struct query * query,struct timeval * now)821 void ares__send_query(ares_channel channel, struct query *query,
822 struct timeval *now)
823 {
824 struct send_request *sendreq;
825 struct server_state *server;
826 int timeplus;
827
828 server = &channel->servers[query->server];
829 if (query->using_tcp)
830 {
831 /* Make sure the TCP socket for this server is set up and queue
832 * a send request.
833 */
834 if (server->tcp_socket == ARES_SOCKET_BAD)
835 {
836 if (open_tcp_socket(channel, server) == -1)
837 {
838 skip_server(channel, query, query->server);
839 next_server(channel, query, now);
840 return;
841 }
842 }
843 sendreq = ares_malloc(sizeof(struct send_request));
844 if (!sendreq)
845 {
846 end_query(channel, query, ARES_ENOMEM, NULL, 0);
847 return;
848 }
849 memset(sendreq, 0, sizeof(struct send_request));
850 /* To make the common case fast, we avoid copies by using the query's
851 * tcpbuf for as long as the query is alive. In the rare case where the
852 * query ends while it's queued for transmission, then we give the
853 * sendreq its own copy of the request packet and put it in
854 * sendreq->data_storage.
855 */
856 sendreq->data_storage = NULL;
857 sendreq->data = query->tcpbuf;
858 sendreq->len = query->tcplen;
859 sendreq->owner_query = query;
860 sendreq->next = NULL;
861 if (server->qtail)
862 server->qtail->next = sendreq;
863 else
864 {
865 SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 1);
866 server->qhead = sendreq;
867 }
868 server->qtail = sendreq;
869 query->server_info[query->server].tcp_connection_generation =
870 server->tcp_connection_generation;
871 }
872 else
873 {
874 if (server->udp_socket == ARES_SOCKET_BAD)
875 {
876 if (open_udp_socket(channel, server) == -1)
877 {
878 skip_server(channel, query, query->server);
879 next_server(channel, query, now);
880 return;
881 }
882 }
883 if (socket_write(channel, server->udp_socket, query->qbuf, query->qlen) == -1)
884 {
885 /* FIXME: Handle EAGAIN here since it likely can happen. */
886 skip_server(channel, query, query->server);
887 next_server(channel, query, now);
888 return;
889 }
890 }
891
892 /* For each trip through the entire server list, double the channel's
893 * assigned timeout, avoiding overflow. If channel->timeout is negative,
894 * leave it as-is, even though that should be impossible here.
895 */
896 timeplus = channel->timeout;
897 {
898 /* How many times do we want to double it? Presume sane values here. */
899 const int shift = query->try_count / channel->nservers;
900
901 /* Is there enough room to shift timeplus left that many times?
902 *
903 * To find out, confirm that all of the bits we'll shift away are zero.
904 * Stop considering a shift if we get to the point where we could shift
905 * a 1 into the sign bit (i.e. when shift is within two of the bit
906 * count).
907 *
908 * This has the side benefit of leaving negative numbers unchanged.
909 */
910 if(shift <= (int)(sizeof(int) * CHAR_BIT - 1)
911 && (timeplus >> (sizeof(int) * CHAR_BIT - 1 - shift)) == 0)
912 {
913 timeplus <<= shift;
914 }
915 }
916
917 query->timeout = *now;
918 timeadd(&query->timeout, timeplus);
919 /* Keep track of queries bucketed by timeout, so we can process
920 * timeout events quickly.
921 */
922 ares__remove_from_list(&(query->queries_by_timeout));
923 ares__insert_in_list(
924 &(query->queries_by_timeout),
925 &(channel->queries_by_timeout[query->timeout.tv_sec %
926 ARES_TIMEOUT_TABLE_SIZE]));
927
928 /* Keep track of queries bucketed by server, so we can process server
929 * errors quickly.
930 */
931 ares__remove_from_list(&(query->queries_to_server));
932 ares__insert_in_list(&(query->queries_to_server),
933 &(server->queries_to_server));
934 }
935
936 /*
937 * setsocknonblock sets the given socket to either blocking or non-blocking
938 * mode based on the 'nonblock' boolean argument. This function is highly
939 * portable.
940 */
setsocknonblock(ares_socket_t sockfd,int nonblock)941 static int setsocknonblock(ares_socket_t sockfd, /* operate on this */
942 int nonblock /* TRUE or FALSE */)
943 {
944 #if defined(USE_BLOCKING_SOCKETS)
945
946 return 0; /* returns success */
947
948 #elif defined(HAVE_FCNTL_O_NONBLOCK)
949
950 /* most recent unix versions */
951 int flags;
952 flags = fcntl(sockfd, F_GETFL, 0);
953 if (FALSE != nonblock)
954 return fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);
955 else
956 return fcntl(sockfd, F_SETFL, flags & (~O_NONBLOCK)); /* LCOV_EXCL_LINE */
957
958 #elif defined(HAVE_IOCTL_FIONBIO)
959
960 /* older unix versions */
961 int flags = nonblock ? 1 : 0;
962 return ioctl(sockfd, FIONBIO, &flags);
963
964 #elif defined(HAVE_IOCTLSOCKET_FIONBIO)
965
966 #ifdef WATT32
967 char flags = nonblock ? 1 : 0;
968 #else
969 /* Windows */
970 unsigned long flags = nonblock ? 1UL : 0UL;
971 #endif
972 return ioctlsocket(sockfd, FIONBIO, &flags);
973
974 #elif defined(HAVE_IOCTLSOCKET_CAMEL_FIONBIO)
975
976 /* Amiga */
977 long flags = nonblock ? 1L : 0L;
978 return IoctlSocket(sockfd, FIONBIO, flags);
979
980 #elif defined(HAVE_SETSOCKOPT_SO_NONBLOCK)
981
982 /* BeOS */
983 long b = nonblock ? 1L : 0L;
984 return setsockopt(sockfd, SOL_SOCKET, SO_NONBLOCK, &b, sizeof(b));
985
986 #else
987 # error "no non-blocking method was found/used/set"
988 #endif
989 }
990
991 #if defined(IPV6_V6ONLY) && defined(WIN32)
992 /* It makes support for IPv4-mapped IPv6 addresses.
993 * Linux kernel, NetBSD, FreeBSD and Darwin: default is off;
994 * Windows Vista and later: default is on;
995 * DragonFly BSD: acts like off, and dummy setting;
996 * OpenBSD and earlier Windows: unsupported.
997 * Linux: controlled by /proc/sys/net/ipv6/bindv6only.
998 */
set_ipv6_v6only(ares_socket_t sockfd,int on)999 static void set_ipv6_v6only(ares_socket_t sockfd, int on)
1000 {
1001 (void)setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, (void *)&on, sizeof(on));
1002 }
1003 #else
1004 #define set_ipv6_v6only(s,v)
1005 #endif
1006
configure_socket(ares_socket_t s,int family,ares_channel channel)1007 static int configure_socket(ares_socket_t s, int family, ares_channel channel)
1008 {
1009 union {
1010 struct sockaddr sa;
1011 struct sockaddr_in sa4;
1012 struct sockaddr_in6 sa6;
1013 } local;
1014
1015 /* do not set options for user-managed sockets */
1016 if (channel->sock_funcs)
1017 return 0;
1018
1019 (void)setsocknonblock(s, TRUE);
1020
1021 #if defined(FD_CLOEXEC) && !defined(MSDOS)
1022 /* Configure the socket fd as close-on-exec. */
1023 if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1)
1024 return -1; /* LCOV_EXCL_LINE */
1025 #endif
1026
1027 /* Set the socket's send and receive buffer sizes. */
1028 if ((channel->socket_send_buffer_size > 0) &&
1029 setsockopt(s, SOL_SOCKET, SO_SNDBUF,
1030 (void *)&channel->socket_send_buffer_size,
1031 sizeof(channel->socket_send_buffer_size)) == -1)
1032 return -1;
1033
1034 if ((channel->socket_receive_buffer_size > 0) &&
1035 setsockopt(s, SOL_SOCKET, SO_RCVBUF,
1036 (void *)&channel->socket_receive_buffer_size,
1037 sizeof(channel->socket_receive_buffer_size)) == -1)
1038 return -1;
1039
1040 #ifdef SO_BINDTODEVICE
1041 if (channel->local_dev_name[0]) {
1042 if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
1043 channel->local_dev_name, sizeof(channel->local_dev_name))) {
1044 /* Only root can do this, and usually not fatal if it doesn't work, so */
1045 /* just continue on. */
1046 }
1047 }
1048 #endif
1049
1050 if (family == AF_INET) {
1051 if (channel->local_ip4) {
1052 memset(&local.sa4, 0, sizeof(local.sa4));
1053 local.sa4.sin_family = AF_INET;
1054 local.sa4.sin_addr.s_addr = htonl(channel->local_ip4);
1055 if (bind(s, &local.sa, sizeof(local.sa4)) < 0)
1056 return -1;
1057 }
1058 }
1059 else if (family == AF_INET6) {
1060 if (memcmp(channel->local_ip6, &ares_in6addr_any,
1061 sizeof(channel->local_ip6)) != 0) {
1062 memset(&local.sa6, 0, sizeof(local.sa6));
1063 local.sa6.sin6_family = AF_INET6;
1064 memcpy(&local.sa6.sin6_addr, channel->local_ip6,
1065 sizeof(channel->local_ip6));
1066 if (bind(s, &local.sa, sizeof(local.sa6)) < 0)
1067 return -1;
1068 }
1069 set_ipv6_v6only(s, 0);
1070 }
1071
1072 return 0;
1073 }
1074
open_tcp_socket(ares_channel channel,struct server_state * server)1075 static int open_tcp_socket(ares_channel channel, struct server_state *server)
1076 {
1077 ares_socket_t s;
1078 int opt;
1079 ares_socklen_t salen;
1080 union {
1081 struct sockaddr_in sa4;
1082 struct sockaddr_in6 sa6;
1083 } saddr;
1084 struct sockaddr *sa;
1085
1086 switch (server->addr.family)
1087 {
1088 case AF_INET:
1089 sa = (void *)&saddr.sa4;
1090 salen = sizeof(saddr.sa4);
1091 memset(sa, 0, salen);
1092 saddr.sa4.sin_family = AF_INET;
1093 if (server->addr.tcp_port) {
1094 saddr.sa4.sin_port = aresx_sitous(server->addr.tcp_port);
1095 } else {
1096 saddr.sa4.sin_port = aresx_sitous(channel->tcp_port);
1097 }
1098 memcpy(&saddr.sa4.sin_addr, &server->addr.addrV4,
1099 sizeof(server->addr.addrV4));
1100 break;
1101 case AF_INET6:
1102 sa = (void *)&saddr.sa6;
1103 salen = sizeof(saddr.sa6);
1104 memset(sa, 0, salen);
1105 saddr.sa6.sin6_family = AF_INET6;
1106 if (server->addr.tcp_port) {
1107 saddr.sa6.sin6_port = aresx_sitous(server->addr.tcp_port);
1108 } else {
1109 saddr.sa6.sin6_port = aresx_sitous(channel->tcp_port);
1110 }
1111 memcpy(&saddr.sa6.sin6_addr, &server->addr.addrV6,
1112 sizeof(server->addr.addrV6));
1113 break;
1114 default:
1115 return -1; /* LCOV_EXCL_LINE */
1116 }
1117
1118 /* Acquire a socket. */
1119 s = ares__open_socket(channel, server->addr.family, SOCK_STREAM, 0);
1120 if (s == ARES_SOCKET_BAD)
1121 return -1;
1122
1123 /* Configure it. */
1124 if (configure_socket(s, server->addr.family, channel) < 0)
1125 {
1126 ares__close_socket(channel, s);
1127 return -1;
1128 }
1129
1130 #ifdef TCP_NODELAY
1131 /*
1132 * Disable the Nagle algorithm (only relevant for TCP sockets, and thus not
1133 * in configure_socket). In general, in DNS lookups we're pretty much
1134 * interested in firing off a single request and then waiting for a reply,
1135 * so batching isn't very interesting.
1136 */
1137 opt = 1;
1138 if (channel->sock_funcs == 0
1139 &&
1140 setsockopt(s, IPPROTO_TCP, TCP_NODELAY,
1141 (void *)&opt, sizeof(opt)) == -1)
1142 {
1143 ares__close_socket(channel, s);
1144 return -1;
1145 }
1146 #endif
1147
1148 if (channel->sock_config_cb)
1149 {
1150 int err = channel->sock_config_cb(s, SOCK_STREAM,
1151 channel->sock_config_cb_data);
1152 if (err < 0)
1153 {
1154 ares__close_socket(channel, s);
1155 return err;
1156 }
1157 }
1158
1159 /* Connect to the server. */
1160 if (ares__connect_socket(channel, s, sa, salen) == -1)
1161 {
1162 int err = SOCKERRNO;
1163
1164 if (err != EINPROGRESS && err != EWOULDBLOCK)
1165 {
1166 ares__close_socket(channel, s);
1167 return -1;
1168 }
1169 }
1170
1171 if (channel->sock_create_cb)
1172 {
1173 int err = channel->sock_create_cb(s, SOCK_STREAM,
1174 channel->sock_create_cb_data);
1175 if (err < 0)
1176 {
1177 ares__close_socket(channel, s);
1178 return err;
1179 }
1180 }
1181
1182 SOCK_STATE_CALLBACK(channel, s, 1, 0);
1183 server->tcp_buffer_pos = 0;
1184 server->tcp_socket = s;
1185 server->tcp_connection_generation = ++channel->tcp_connection_generation;
1186 return 0;
1187 }
1188
open_udp_socket(ares_channel channel,struct server_state * server)1189 static int open_udp_socket(ares_channel channel, struct server_state *server)
1190 {
1191 ares_socket_t s;
1192 ares_socklen_t salen;
1193 union {
1194 struct sockaddr_in sa4;
1195 struct sockaddr_in6 sa6;
1196 } saddr;
1197 struct sockaddr *sa;
1198
1199 switch (server->addr.family)
1200 {
1201 case AF_INET:
1202 sa = (void *)&saddr.sa4;
1203 salen = sizeof(saddr.sa4);
1204 memset(sa, 0, salen);
1205 saddr.sa4.sin_family = AF_INET;
1206 if (server->addr.udp_port) {
1207 saddr.sa4.sin_port = aresx_sitous(server->addr.udp_port);
1208 } else {
1209 saddr.sa4.sin_port = aresx_sitous(channel->udp_port);
1210 }
1211 memcpy(&saddr.sa4.sin_addr, &server->addr.addrV4,
1212 sizeof(server->addr.addrV4));
1213 break;
1214 case AF_INET6:
1215 sa = (void *)&saddr.sa6;
1216 salen = sizeof(saddr.sa6);
1217 memset(sa, 0, salen);
1218 saddr.sa6.sin6_family = AF_INET6;
1219 if (server->addr.udp_port) {
1220 saddr.sa6.sin6_port = aresx_sitous(server->addr.udp_port);
1221 } else {
1222 saddr.sa6.sin6_port = aresx_sitous(channel->udp_port);
1223 }
1224 memcpy(&saddr.sa6.sin6_addr, &server->addr.addrV6,
1225 sizeof(server->addr.addrV6));
1226 break;
1227 default:
1228 return -1; /* LCOV_EXCL_LINE */
1229 }
1230
1231 /* Acquire a socket. */
1232 s = ares__open_socket(channel, server->addr.family, SOCK_DGRAM, 0);
1233 if (s == ARES_SOCKET_BAD)
1234 return -1;
1235
1236 /* Set the socket non-blocking. */
1237 if (configure_socket(s, server->addr.family, channel) < 0)
1238 {
1239 ares__close_socket(channel, s);
1240 return -1;
1241 }
1242
1243 if (channel->sock_config_cb)
1244 {
1245 int err = channel->sock_config_cb(s, SOCK_DGRAM,
1246 channel->sock_config_cb_data);
1247 if (err < 0)
1248 {
1249 ares__close_socket(channel, s);
1250 return err;
1251 }
1252 }
1253
1254 /* Connect to the server. */
1255 if (ares__connect_socket(channel, s, sa, salen) == -1)
1256 {
1257 int err = SOCKERRNO;
1258
1259 if (err != EINPROGRESS && err != EWOULDBLOCK)
1260 {
1261 ares__close_socket(channel, s);
1262 return -1;
1263 }
1264 }
1265
1266 if (channel->sock_create_cb)
1267 {
1268 int err = channel->sock_create_cb(s, SOCK_DGRAM,
1269 channel->sock_create_cb_data);
1270 if (err < 0)
1271 {
1272 ares__close_socket(channel, s);
1273 return err;
1274 }
1275 }
1276
1277 SOCK_STATE_CALLBACK(channel, s, 1, 0);
1278
1279 server->udp_socket = s;
1280 return 0;
1281 }
1282
same_questions(const unsigned char * qbuf,int qlen,const unsigned char * abuf,int alen)1283 static int same_questions(const unsigned char *qbuf, int qlen,
1284 const unsigned char *abuf, int alen)
1285 {
1286 struct {
1287 const unsigned char *p;
1288 int qdcount;
1289 char *name;
1290 long namelen;
1291 int type;
1292 int dnsclass;
1293 } q, a;
1294 int i, j;
1295
1296 if (qlen < HFIXEDSZ || alen < HFIXEDSZ)
1297 return 0;
1298
1299 /* Extract qdcount from the request and reply buffers and compare them. */
1300 q.qdcount = DNS_HEADER_QDCOUNT(qbuf);
1301 a.qdcount = DNS_HEADER_QDCOUNT(abuf);
1302 if (q.qdcount != a.qdcount)
1303 return 0;
1304
1305 /* For each question in qbuf, find it in abuf. */
1306 q.p = qbuf + HFIXEDSZ;
1307 for (i = 0; i < q.qdcount; i++)
1308 {
1309 /* Decode the question in the query. */
1310 if (ares_expand_name(q.p, qbuf, qlen, &q.name, &q.namelen)
1311 != ARES_SUCCESS)
1312 return 0;
1313 q.p += q.namelen;
1314 if (q.p + QFIXEDSZ > qbuf + qlen)
1315 {
1316 ares_free(q.name);
1317 return 0;
1318 }
1319 q.type = DNS_QUESTION_TYPE(q.p);
1320 q.dnsclass = DNS_QUESTION_CLASS(q.p);
1321 q.p += QFIXEDSZ;
1322
1323 /* Search for this question in the answer. */
1324 a.p = abuf + HFIXEDSZ;
1325 for (j = 0; j < a.qdcount; j++)
1326 {
1327 /* Decode the question in the answer. */
1328 if (ares_expand_name(a.p, abuf, alen, &a.name, &a.namelen)
1329 != ARES_SUCCESS)
1330 {
1331 ares_free(q.name);
1332 return 0;
1333 }
1334 a.p += a.namelen;
1335 if (a.p + QFIXEDSZ > abuf + alen)
1336 {
1337 ares_free(q.name);
1338 ares_free(a.name);
1339 return 0;
1340 }
1341 a.type = DNS_QUESTION_TYPE(a.p);
1342 a.dnsclass = DNS_QUESTION_CLASS(a.p);
1343 a.p += QFIXEDSZ;
1344
1345 /* Compare the decoded questions. */
1346 if (strcasecmp(q.name, a.name) == 0 && q.type == a.type
1347 && q.dnsclass == a.dnsclass)
1348 {
1349 ares_free(a.name);
1350 break;
1351 }
1352 ares_free(a.name);
1353 }
1354
1355 ares_free(q.name);
1356 if (j == a.qdcount)
1357 return 0;
1358 }
1359 return 1;
1360 }
1361
same_address(struct sockaddr * sa,struct ares_addr * aa)1362 static int same_address(struct sockaddr *sa, struct ares_addr *aa)
1363 {
1364 void *addr1;
1365 void *addr2;
1366
1367 if (sa->sa_family == aa->family)
1368 {
1369 switch (aa->family)
1370 {
1371 case AF_INET:
1372 addr1 = &aa->addrV4;
1373 addr2 = &(CARES_INADDR_CAST(struct sockaddr_in *, sa))->sin_addr;
1374 if (memcmp(addr1, addr2, sizeof(aa->addrV4)) == 0)
1375 return 1; /* match */
1376 break;
1377 case AF_INET6:
1378 addr1 = &aa->addrV6;
1379 addr2 = &(CARES_INADDR_CAST(struct sockaddr_in6 *, sa))->sin6_addr;
1380 if (memcmp(addr1, addr2, sizeof(aa->addrV6)) == 0)
1381 return 1; /* match */
1382 break;
1383 default:
1384 break; /* LCOV_EXCL_LINE */
1385 }
1386 }
1387 return 0; /* different */
1388 }
1389
1390 /* search for an OPT RR in the response */
has_opt_rr(const unsigned char * abuf,int alen)1391 static int has_opt_rr(const unsigned char *abuf, int alen)
1392 {
1393 unsigned int qdcount, ancount, nscount, arcount, i;
1394 const unsigned char *aptr;
1395 int status;
1396
1397 if (alen < HFIXEDSZ)
1398 return -1;
1399
1400 /* Parse the answer header. */
1401 qdcount = DNS_HEADER_QDCOUNT(abuf);
1402 ancount = DNS_HEADER_ANCOUNT(abuf);
1403 nscount = DNS_HEADER_NSCOUNT(abuf);
1404 arcount = DNS_HEADER_ARCOUNT(abuf);
1405
1406 aptr = abuf + HFIXEDSZ;
1407
1408 /* skip the questions */
1409 for (i = 0; i < qdcount; i++)
1410 {
1411 char* name;
1412 long len;
1413 status = ares_expand_name(aptr, abuf, alen, &name, &len);
1414 if (status != ARES_SUCCESS)
1415 return -1;
1416 ares_free_string(name);
1417 if (aptr + len + QFIXEDSZ > abuf + alen)
1418 return -1;
1419 aptr += len + QFIXEDSZ;
1420 }
1421
1422 /* skip the ancount and nscount */
1423 for (i = 0; i < ancount + nscount; i++)
1424 {
1425 char* name;
1426 long len;
1427 int dlen;
1428 status = ares_expand_name(aptr, abuf, alen, &name, &len);
1429 if (status != ARES_SUCCESS)
1430 return -1;
1431 ares_free_string(name);
1432 if (aptr + len + RRFIXEDSZ > abuf + alen)
1433 return -1;
1434 aptr += len;
1435 dlen = DNS_RR_LEN(aptr);
1436 aptr += RRFIXEDSZ;
1437 if (aptr + dlen > abuf + alen)
1438 return -1;
1439 aptr += dlen;
1440 }
1441
1442 /* search for rr type (41) - opt */
1443 for (i = 0; i < arcount; i++)
1444 {
1445 char* name;
1446 long len;
1447 int dlen;
1448 status = ares_expand_name(aptr, abuf, alen, &name, &len);
1449 if (status != ARES_SUCCESS)
1450 return -1;
1451 ares_free_string(name);
1452 if (aptr + len + RRFIXEDSZ > abuf + alen)
1453 return -1;
1454 aptr += len;
1455
1456 if (DNS_RR_TYPE(aptr) == T_OPT)
1457 return 1;
1458
1459 dlen = DNS_RR_LEN(aptr);
1460 aptr += RRFIXEDSZ;
1461 if (aptr + dlen > abuf + alen)
1462 return -1;
1463 aptr += dlen;
1464 }
1465
1466 return 0;
1467 }
1468
end_query(ares_channel channel,struct query * query,int status,unsigned char * abuf,int alen)1469 static void end_query (ares_channel channel, struct query *query, int status,
1470 unsigned char *abuf, int alen)
1471 {
1472 int i;
1473
1474 /* First we check to see if this query ended while one of our send
1475 * queues still has pointers to it.
1476 */
1477 for (i = 0; i < channel->nservers; i++)
1478 {
1479 struct server_state *server = &channel->servers[i];
1480 struct send_request *sendreq;
1481 for (sendreq = server->qhead; sendreq; sendreq = sendreq->next)
1482 if (sendreq->owner_query == query)
1483 {
1484 sendreq->owner_query = NULL;
1485 assert(sendreq->data_storage == NULL);
1486 if (status == ARES_SUCCESS)
1487 {
1488 /* We got a reply for this query, but this queued sendreq
1489 * points into this soon-to-be-gone query's tcpbuf. Probably
1490 * this means we timed out and queued the query for
1491 * retransmission, then received a response before actually
1492 * retransmitting. This is perfectly fine, so we want to keep
1493 * the connection running smoothly if we can. But in the worst
1494 * case we may have sent only some prefix of the query, with
1495 * some suffix of the query left to send. Also, the buffer may
1496 * be queued on multiple queues. To prevent dangling pointers
1497 * to the query's tcpbuf and handle these cases, we just give
1498 * such sendreqs their own copy of the query packet.
1499 */
1500 sendreq->data_storage = ares_malloc(sendreq->len);
1501 if (sendreq->data_storage != NULL)
1502 {
1503 memcpy(sendreq->data_storage, sendreq->data, sendreq->len);
1504 sendreq->data = sendreq->data_storage;
1505 }
1506 }
1507 if ((status != ARES_SUCCESS) || (sendreq->data_storage == NULL))
1508 {
1509 /* We encountered an error (probably a timeout, suggesting the
1510 * DNS server we're talking to is probably unreachable,
1511 * wedged, or severely overloaded) or we couldn't copy the
1512 * request, so mark the connection as broken. When we get to
1513 * process_broken_connections() we'll close the connection and
1514 * try to re-send requests to another server.
1515 */
1516 server->is_broken = 1;
1517 /* Just to be paranoid, zero out this sendreq... */
1518 sendreq->data = NULL;
1519 sendreq->len = 0;
1520 }
1521 }
1522 }
1523
1524 /* Invoke the callback */
1525 query->callback(query->arg, status, query->timeouts, abuf, alen);
1526 ares__free_query(query);
1527
1528 /* Simple cleanup policy: if no queries are remaining, close all network
1529 * sockets unless STAYOPEN is set.
1530 */
1531 if (!(channel->flags & ARES_FLAG_STAYOPEN) &&
1532 ares__is_list_empty(&(channel->all_queries)))
1533 {
1534 for (i = 0; i < channel->nservers; i++)
1535 ares__close_sockets(channel, &channel->servers[i]);
1536 }
1537 }
1538
ares__free_query(struct query * query)1539 void ares__free_query(struct query *query)
1540 {
1541 /* Remove the query from all the lists in which it is linked */
1542 ares__remove_from_list(&(query->queries_by_qid));
1543 ares__remove_from_list(&(query->queries_by_timeout));
1544 ares__remove_from_list(&(query->queries_to_server));
1545 ares__remove_from_list(&(query->all_queries));
1546 /* Zero out some important stuff, to help catch bugs */
1547 query->callback = NULL;
1548 query->arg = NULL;
1549 /* Deallocate the memory associated with the query */
1550 ares_free(query->tcpbuf);
1551 ares_free(query->server_info);
1552 ares_free(query);
1553 }
1554
ares__open_socket(ares_channel channel,int af,int type,int protocol)1555 ares_socket_t ares__open_socket(ares_channel channel,
1556 int af, int type, int protocol)
1557 {
1558 if (channel->sock_funcs)
1559 return channel->sock_funcs->asocket(af,
1560 type,
1561 protocol,
1562 channel->sock_func_cb_data);
1563 else
1564 return socket(af, type, protocol);
1565 }
1566
ares__connect_socket(ares_channel channel,ares_socket_t sockfd,const struct sockaddr * addr,ares_socklen_t addrlen)1567 int ares__connect_socket(ares_channel channel,
1568 ares_socket_t sockfd,
1569 const struct sockaddr *addr,
1570 ares_socklen_t addrlen)
1571 {
1572 if (channel->sock_funcs)
1573 return channel->sock_funcs->aconnect(sockfd,
1574 addr,
1575 addrlen,
1576 channel->sock_func_cb_data);
1577 else
1578 return connect(sockfd, addr, addrlen);
1579 }
1580
ares__close_socket(ares_channel channel,ares_socket_t s)1581 void ares__close_socket(ares_channel channel, ares_socket_t s)
1582 {
1583 if (channel->sock_funcs)
1584 channel->sock_funcs->aclose(s, channel->sock_func_cb_data);
1585 else
1586 sclose(s);
1587 }
1588