1 /* $NetBSD: res_send.c,v 1.9 2006/01/24 17:41:25 christos Exp $ */
2
3 /*
4 * Copyright (c) 1985, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 /*
37 * Portions Copyright (c) 1993 by Digital Equipment Corporation.
38 *
39 * Permission to use, copy, modify, and distribute this software for any
40 * purpose with or without fee is hereby granted, provided that the above
41 * copyright notice and this permission notice appear in all copies, and that
42 * the name of Digital Equipment Corporation not be used in advertising or
43 * publicity pertaining to distribution of the document or software without
44 * specific, written prior permission.
45 *
46 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
47 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
48 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
49 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
50 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
51 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
52 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53 * SOFTWARE.
54 */
55
56 /*
57 * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
58 * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
59 *
60 * Permission to use, copy, modify, and distribute this software for any
61 * purpose with or without fee is hereby granted, provided that the above
62 * copyright notice and this permission notice appear in all copies.
63 *
64 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
70 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71 */
72
73 /*
74 * Send query to name server and wait for reply.
75 */
76
77 #define LOG_TAG "resolv"
78
79 #include <chrono>
80
81 #include <sys/param.h>
82 #include <sys/socket.h>
83 #include <sys/time.h>
84 #include <sys/uio.h>
85
86 #include <arpa/inet.h>
87 #include <arpa/nameser.h>
88
89 #include <errno.h>
90 #include <fcntl.h>
91 #include <netdb.h>
92 #include <poll.h>
93 #include <signal.h>
94 #include <stdlib.h>
95 #include <string.h>
96 #include <time.h>
97 #include <unistd.h>
98 #include <span>
99
100 #include <android-base/logging.h>
101 #include <android-base/result.h>
102 #include <android/multinetwork.h> // ResNsendFlags
103
104 #include <netdutils/Slice.h>
105 #include <netdutils/Stopwatch.h>
106 #include "DnsTlsDispatcher.h"
107 #include "DnsTlsTransport.h"
108 #include "Experiments.h"
109 #include "PrivateDnsConfiguration.h"
110 #include "netd_resolv/resolv.h"
111 #include "private/android_filesystem_config.h"
112
113 #include "doh.h"
114 #include "res_comp.h"
115 #include "res_debug.h"
116 #include "resolv_cache.h"
117 #include "stats.h"
118 #include "stats.pb.h"
119 #include "util.h"
120
121 using namespace std::chrono_literals;
122 // TODO: use the namespace something like android::netd_resolv for libnetd_resolv
123 using android::base::ErrnoError;
124 using android::base::Result;
125 using android::base::unique_fd;
126 using android::net::CacheStatus;
127 using android::net::DnsQueryEvent;
128 using android::net::DnsTlsDispatcher;
129 using android::net::DnsTlsServer;
130 using android::net::DnsTlsTransport;
131 using android::net::Experiments;
132 using android::net::IpVersion;
133 using android::net::IV_IPV4;
134 using android::net::IV_IPV6;
135 using android::net::IV_UNKNOWN;
136 using android::net::LinuxErrno;
137 using android::net::NetworkDnsEventReported;
138 using android::net::NS_T_AAAA;
139 using android::net::NS_T_INVALID;
140 using android::net::NsRcode;
141 using android::net::NsType;
142 using android::net::PrivateDnsConfiguration;
143 using android::net::PrivateDnsMode;
144 using android::net::PrivateDnsModes;
145 using android::net::PrivateDnsStatus;
146 using android::net::PROTO_DOH;
147 using android::net::PROTO_MDNS;
148 using android::net::PROTO_TCP;
149 using android::net::PROTO_UDP;
150 using android::netdutils::IPSockAddr;
151 using android::netdutils::Slice;
152 using android::netdutils::Stopwatch;
153 using std::span;
154
155 const std::vector<IPSockAddr> mdns_addrs = {IPSockAddr::toIPSockAddr("ff02::fb", 5353),
156 IPSockAddr::toIPSockAddr("224.0.0.251", 5353)};
157
158 static int setupUdpSocket(ResState* statp, const sockaddr* sockap, unique_fd* fd_out, int* terrno);
159 static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
160 int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, time_t* at,
161 int* rcode, int* delay);
162 static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
163 int* terrno, size_t ns, time_t* at, int* rcode, int* delay);
164 static int send_mdns(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* terrno,
165 int* rcode);
166 static void dump_error(const char*, const struct sockaddr*);
167
168 static int sock_eq(struct sockaddr*, struct sockaddr*);
169 static int connect_with_timeout(int sock, const struct sockaddr* nsap, socklen_t salen,
170 const struct timespec timeout);
171 static int retrying_poll(const int sock, short events, const struct timespec* finish);
172 static int res_private_dns_send(ResState*, const Slice query, const Slice answer, int* rcode,
173 bool* fallback);
174 static int res_tls_send(const std::list<DnsTlsServer>& tlsServers, ResState*, const Slice query,
175 const Slice answer, int* rcode, PrivateDnsMode mode);
176 static ssize_t res_doh_send(ResState*, const Slice query, const Slice answer, int* rcode);
177
getQueryType(span<const uint8_t> msg)178 NsType getQueryType(span<const uint8_t> msg) {
179 ns_msg handle;
180 ns_rr rr;
181 if (ns_initparse(msg.data(), msg.size(), &handle) < 0 ||
182 ns_parserr(&handle, ns_s_qd, 0, &rr) < 0) {
183 return NS_T_INVALID;
184 }
185 return static_cast<NsType>(ns_rr_type(rr));
186 }
187
ipFamilyToIPVersion(const int ipFamily)188 IpVersion ipFamilyToIPVersion(const int ipFamily) {
189 switch (ipFamily) {
190 case AF_INET:
191 return IV_IPV4;
192 case AF_INET6:
193 return IV_IPV6;
194 default:
195 return IV_UNKNOWN;
196 }
197 }
198
199 // BEGIN: Code copied from ISC eventlib
200 // TODO: move away from this code
201 #define BILLION 1000000000
202
evConsTime(time_t sec,long nsec)203 static struct timespec evConsTime(time_t sec, long nsec) {
204 struct timespec x;
205
206 x.tv_sec = sec;
207 x.tv_nsec = nsec;
208 return (x);
209 }
210
evAddTime(struct timespec addend1,struct timespec addend2)211 static struct timespec evAddTime(struct timespec addend1, struct timespec addend2) {
212 struct timespec x;
213
214 x.tv_sec = addend1.tv_sec + addend2.tv_sec;
215 x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
216 if (x.tv_nsec >= BILLION) {
217 x.tv_sec++;
218 x.tv_nsec -= BILLION;
219 }
220 return (x);
221 }
222
evSubTime(struct timespec minuend,struct timespec subtrahend)223 static struct timespec evSubTime(struct timespec minuend, struct timespec subtrahend) {
224 struct timespec x;
225
226 x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
227 if (minuend.tv_nsec >= subtrahend.tv_nsec)
228 x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
229 else {
230 x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
231 x.tv_sec--;
232 }
233 return (x);
234 }
235
evCmpTime(struct timespec a,struct timespec b)236 static int evCmpTime(struct timespec a, struct timespec b) {
237 #define SGN(x) ((x) < 0 ? (-1) : (x) > 0 ? (1) : (0));
238 time_t s = a.tv_sec - b.tv_sec;
239 long n;
240
241 if (s != 0) return SGN(s);
242
243 n = a.tv_nsec - b.tv_nsec;
244 return SGN(n);
245 }
246
evNowTime(void)247 static struct timespec evNowTime(void) {
248 struct timespec tsnow;
249 clock_gettime(CLOCK_REALTIME, &tsnow);
250 return tsnow;
251 }
252
253 // END: Code copied from ISC eventlib
254
255 /* BIONIC-BEGIN: implement source port randomization */
random_bind(int s,int family)256 static int random_bind(int s, int family) {
257 sockaddr_union u;
258 int j;
259 socklen_t slen;
260
261 /* clear all, this also sets the IP4/6 address to 'any' */
262 memset(&u, 0, sizeof u);
263
264 switch (family) {
265 case AF_INET:
266 u.sin.sin_family = family;
267 slen = sizeof u.sin;
268 break;
269 case AF_INET6:
270 u.sin6.sin6_family = family;
271 slen = sizeof u.sin6;
272 break;
273 default:
274 errno = EPROTO;
275 return -1;
276 }
277
278 /* first try to bind to a random source port a few times */
279 for (j = 0; j < 10; j++) {
280 /* find a random port between 1025 .. 65534 */
281 int port = 1025 + (arc4random_uniform(65535 - 1025));
282 // RFC 6762 section 5.1: Don't use 5353 source port on one-shot Multicast DNS queries. DNS
283 // resolver does not fully compliant mDNS.
284 if (port == 5353) continue;
285
286 if (family == AF_INET)
287 u.sin.sin_port = htons(port);
288 else
289 u.sin6.sin6_port = htons(port);
290
291 if (!bind(s, &u.sa, slen)) return 0;
292 }
293
294 // nothing after 10 attempts, our network table is probably busy
295 // let the system decide which port is best
296 if (family == AF_INET)
297 u.sin.sin_port = 0;
298 else
299 u.sin6.sin6_port = 0;
300
301 return bind(s, &u.sa, slen);
302 }
303 /* BIONIC-END */
304
305 // Disables all nameservers other than selectedServer
res_set_usable_server(int selectedServer,int nscount,bool usable_servers[])306 static void res_set_usable_server(int selectedServer, int nscount, bool usable_servers[]) {
307 int usableIndex = 0;
308 for (int ns = 0; ns < nscount; ns++) {
309 if (usable_servers[ns]) ++usableIndex;
310 if (usableIndex != selectedServer) usable_servers[ns] = false;
311 }
312 }
313
314 // Looks up the nameserver address in res.nsaddrs[], returns the ns number if found, otherwise -1.
res_ourserver_p(ResState * statp,const sockaddr * sa)315 static int res_ourserver_p(ResState* statp, const sockaddr* sa) {
316 const sockaddr_in *inp, *srv;
317 const sockaddr_in6 *in6p, *srv6;
318 int ns = 0;
319 switch (sa->sa_family) {
320 case AF_INET:
321 inp = (const struct sockaddr_in*) (const void*) sa;
322
323 for (const IPSockAddr& ipsa : statp->nsaddrs) {
324 sockaddr_storage ss = ipsa;
325 srv = reinterpret_cast<sockaddr_in*>(&ss);
326 if (srv->sin_family == inp->sin_family && srv->sin_port == inp->sin_port &&
327 (srv->sin_addr.s_addr == INADDR_ANY ||
328 srv->sin_addr.s_addr == inp->sin_addr.s_addr))
329 return ns;
330 ++ns;
331 }
332 break;
333 case AF_INET6:
334 in6p = (const struct sockaddr_in6*) (const void*) sa;
335 for (const IPSockAddr& ipsa : statp->nsaddrs) {
336 sockaddr_storage ss = ipsa;
337 srv6 = reinterpret_cast<sockaddr_in6*>(&ss);
338 if (srv6->sin6_family == in6p->sin6_family && srv6->sin6_port == in6p->sin6_port &&
339 #ifdef HAVE_SIN6_SCOPE_ID
340 (srv6->sin6_scope_id == 0 || srv6->sin6_scope_id == in6p->sin6_scope_id) &&
341 #endif
342 (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
343 IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
344 return ns;
345 ++ns;
346 }
347 break;
348 default:
349 break;
350 }
351 return -1;
352 }
353
354 /* int
355 * res_nameinquery(name, type, cl, msg, eom)
356 * look for (name, type, cl) in the query section of packet (msg, eom)
357 * requires:
358 * msg + HFIXEDSZ <= eom
359 * returns:
360 * -1 : format error
361 * 0 : not found
362 * >0 : found
363 * author:
364 * paul vixie, 29may94
365 */
res_nameinquery(const char * name,int type,int cl,const uint8_t * msg,const uint8_t * eom)366 int res_nameinquery(const char* name, int type, int cl, const uint8_t* msg, const uint8_t* eom) {
367 const uint8_t* cp = msg + HFIXEDSZ;
368 int qdcount = ntohs(((const HEADER*)(const void*)msg)->qdcount);
369
370 while (qdcount-- > 0) {
371 char tname[MAXDNAME + 1];
372 int n = dn_expand(msg, eom, cp, tname, sizeof tname);
373 if (n < 0) return (-1);
374 cp += n;
375 if (cp + 2 * INT16SZ > eom) return (-1);
376 int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
377 cp += INT16SZ;
378 int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
379 cp += INT16SZ;
380 if (ttype == type && tclass == cl && ns_samename(tname, name) == 1) return (1);
381 }
382 return (0);
383 }
384
385 /* int
386 * res_queriesmatch(buf1, eom1, buf2, eom2)
387 * is there a 1:1 mapping of (name,type,class)
388 * in (buf1,eom1) and (buf2,eom2)?
389 * returns:
390 * -1 : format error
391 * 0 : not a 1:1 mapping
392 * >0 : is a 1:1 mapping
393 * author:
394 * paul vixie, 29may94
395 */
res_queriesmatch(const uint8_t * buf1,const uint8_t * eom1,const uint8_t * buf2,const uint8_t * eom2)396 int res_queriesmatch(const uint8_t* buf1, const uint8_t* eom1, const uint8_t* buf2,
397 const uint8_t* eom2) {
398 const uint8_t* cp = buf1 + HFIXEDSZ;
399 int qdcount = ntohs(((const HEADER*) (const void*) buf1)->qdcount);
400
401 if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2) return (-1);
402
403 /*
404 * Only header section present in replies to
405 * dynamic update packets.
406 */
407 if ((((const HEADER*) (const void*) buf1)->opcode == ns_o_update) &&
408 (((const HEADER*) (const void*) buf2)->opcode == ns_o_update))
409 return (1);
410
411 if (qdcount != ntohs(((const HEADER*) (const void*) buf2)->qdcount)) return (0);
412 while (qdcount-- > 0) {
413 char tname[MAXDNAME + 1];
414 int n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
415 if (n < 0) return (-1);
416 cp += n;
417 if (cp + 2 * INT16SZ > eom1) return (-1);
418 int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
419 cp += INT16SZ;
420 int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
421 cp += INT16SZ;
422 if (!res_nameinquery(tname, ttype, tclass, buf2, eom2)) return (0);
423 }
424 return (1);
425 }
426
addDnsQueryEvent(NetworkDnsEventReported * event)427 static DnsQueryEvent* addDnsQueryEvent(NetworkDnsEventReported* event) {
428 return event->mutable_dns_query_events()->add_dns_query_event();
429 }
430
isNetworkRestricted(int terrno)431 static bool isNetworkRestricted(int terrno) {
432 // It's possible that system was in some network restricted mode, which blocked
433 // the operation of sending packet and resulted in EPERM errno.
434 // It would be no reason to keep retrying on that case.
435 // TODO: Check the system status to know if network restricted mode is
436 // enabled.
437 return (terrno == EPERM);
438 }
439
res_nsend(ResState * statp,span<const uint8_t> msg,span<uint8_t> ans,int * rcode,uint32_t flags,std::chrono::milliseconds sleepTimeMs)440 int res_nsend(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* rcode,
441 uint32_t flags, std::chrono::milliseconds sleepTimeMs) {
442 LOG(DEBUG) << __func__;
443
444 // Should not happen
445 if (ans.size() < HFIXEDSZ) {
446 // TODO: Remove errno once callers stop using it
447 errno = EINVAL;
448 return -EINVAL;
449 }
450 res_pquery(msg);
451
452 int anslen = 0;
453 Stopwatch cacheStopwatch;
454 ResolvCacheStatus cache_status = resolv_cache_lookup(statp->netid, msg, ans, &anslen, flags);
455 const int32_t cacheLatencyUs = saturate_cast<int32_t>(cacheStopwatch.timeTakenUs());
456 if (cache_status == RESOLV_CACHE_FOUND) {
457 HEADER* hp = (HEADER*)(void*)ans.data();
458 *rcode = hp->rcode;
459 DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
460 dnsQueryEvent->set_latency_micros(cacheLatencyUs);
461 dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
462 dnsQueryEvent->set_type(getQueryType(msg));
463 return anslen;
464 } else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
465 // had a cache miss for a known network, so populate the thread private
466 // data so the normal resolve path can do its thing
467 resolv_populate_res_for_net(statp);
468 }
469
470 // MDNS
471 if (isMdnsResolution(statp->flags)) {
472 // Use an impossible error code as default value.
473 int terrno = ETIME;
474 int resplen = 0;
475 *rcode = RCODE_INTERNAL_ERROR;
476 Stopwatch queryStopwatch;
477 resplen = send_mdns(statp, msg, ans, &terrno, rcode);
478 const IPSockAddr& receivedMdnsAddr =
479 (getQueryType(msg) == NS_T_AAAA) ? mdns_addrs[0] : mdns_addrs[1];
480 DnsQueryEvent* mDnsQueryEvent = addDnsQueryEvent(statp->event);
481 mDnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
482 mDnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
483 mDnsQueryEvent->set_ip_version(ipFamilyToIPVersion(receivedMdnsAddr.family()));
484 mDnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
485 mDnsQueryEvent->set_protocol(PROTO_MDNS);
486 mDnsQueryEvent->set_type(getQueryType(msg));
487 mDnsQueryEvent->set_linux_errno(static_cast<LinuxErrno>(terrno));
488 resolv_stats_add(statp->netid, receivedMdnsAddr, mDnsQueryEvent);
489
490 if (resplen > 0) {
491 LOG(DEBUG) << __func__ << ": got answer from mDNS:";
492 res_pquery(ans.first(resplen));
493
494 if (cache_status == RESOLV_CACHE_NOTFOUND) {
495 resolv_cache_add(statp->netid, msg, {ans.data(), resplen});
496 }
497 return resplen;
498 }
499 }
500
501 if (statp->nameserverCount() == 0) {
502 // We have no nameservers configured and it's not a MDNS resolution, so there's no
503 // point trying. Tell the cache the query failed, or any retries and anyone else
504 // asking the same question will block for PENDING_REQUEST_TIMEOUT seconds instead
505 // of failing fast.
506 _resolv_cache_query_failed(statp->netid, msg, flags);
507
508 // TODO: Remove errno once callers stop using it
509 errno = ESRCH;
510 return -ESRCH;
511 }
512
513 // Private DNS
514 if (!(statp->netcontext_flags & NET_CONTEXT_FLAG_USE_LOCAL_NAMESERVERS)) {
515 bool fallback = false;
516 int resplen =
517 res_private_dns_send(statp, Slice(const_cast<uint8_t*>(msg.data()), msg.size()),
518 Slice(ans.data(), ans.size()), rcode, &fallback);
519 if (resplen > 0) {
520 LOG(DEBUG) << __func__ << ": got answer from Private DNS";
521 res_pquery(ans.first(resplen));
522 if (cache_status == RESOLV_CACHE_NOTFOUND) {
523 resolv_cache_add(statp->netid, msg, ans.first(resplen));
524 }
525 return resplen;
526 }
527 if (!fallback) {
528 _resolv_cache_query_failed(statp->netid, msg, flags);
529 return -ETIMEDOUT;
530 }
531 }
532
533 // If parallel_lookup is enabled, it might be required to wait some time to avoid
534 // gateways from dropping packets if queries are sent too close together.
535 if (sleepTimeMs != 0ms) {
536 std::this_thread::sleep_for(sleepTimeMs);
537 }
538
539 res_stats stats[MAXNS]{};
540 res_params params;
541 int revision_id = resolv_cache_get_resolver_stats(statp->netid, ¶ms, stats, statp->nsaddrs);
542 if (revision_id < 0) {
543 // TODO: Remove errno once callers stop using it
544 errno = ESRCH;
545 return -ESRCH;
546 }
547
548 bool usable_servers[MAXNS];
549 int usableServersCount = android_net_res_stats_get_usable_servers(
550 ¶ms, stats, statp->nameserverCount(), usable_servers);
551
552 if (statp->sort_nameservers) {
553 // It's unnecessary to mark a DNS server as unusable since broken servers will be less
554 // likely to be chosen.
555 for (int i = 0; i < statp->nameserverCount(); i++) {
556 usable_servers[i] = true;
557 }
558 }
559
560 // TODO: Let it always choose the first nameserver when sort_nameservers is enabled.
561 if ((flags & ANDROID_RESOLV_NO_RETRY) && usableServersCount > 1) {
562 auto hp = reinterpret_cast<const HEADER*>(msg.data());
563
564 // Select a random server based on the query id
565 int selectedServer = (hp->id % usableServersCount) + 1;
566 res_set_usable_server(selectedServer, statp->nameserverCount(), usable_servers);
567 }
568
569 // Send request, RETRY times, or until successful.
570 int retryTimes = (flags & ANDROID_RESOLV_NO_RETRY) ? 1 : params.retry_count;
571 int useTcp = msg.size() > PACKETSZ;
572 int gotsomewhere = 0;
573
574 // Use an impossible error code as default value
575 int terrno = ETIME;
576 // plaintext DNS
577 for (int attempt = 0; attempt < retryTimes; ++attempt) {
578 for (size_t ns = 0; ns < statp->nsaddrs.size(); ++ns) {
579 if (!usable_servers[ns]) continue;
580
581 *rcode = RCODE_INTERNAL_ERROR;
582
583 // Get server addr
584 const IPSockAddr& serverSockAddr = statp->nsaddrs[ns];
585 LOG(DEBUG) << __func__ << ": Querying server (# " << ns + 1
586 << ") address = " << serverSockAddr.toString();
587
588 ::android::net::Protocol query_proto = useTcp ? PROTO_TCP : PROTO_UDP;
589 time_t query_time = 0;
590 int delay = 0;
591 bool fallbackTCP = false;
592 const bool shouldRecordStats = (attempt == 0);
593 int resplen;
594 Stopwatch queryStopwatch;
595 int retry_count_for_event = 0;
596 size_t actualNs = ns;
597 // Use an impossible error code as default value
598 terrno = ETIME;
599 if (useTcp) {
600 // TCP; at most one attempt per server.
601 attempt = retryTimes;
602 resplen =
603 send_vc(statp, ¶ms, msg, ans, &terrno, ns, &query_time, rcode, &delay);
604
605 if (msg.size() <= PACKETSZ && resplen <= 0 &&
606 statp->tc_mode == aidl::android::net::IDnsResolver::TC_MODE_UDP_TCP) {
607 // reset to UDP for next query on next DNS server if resolver is currently doing
608 // TCP fallback retry and current server does not support TCP connectin
609 useTcp = false;
610 }
611 LOG(INFO) << __func__ << ": used send_vc " << resplen << " terrno: " << terrno;
612 } else {
613 // UDP
614 resplen = send_dg(statp, ¶ms, msg, ans, &terrno, &actualNs, &useTcp,
615 &gotsomewhere, &query_time, rcode, &delay);
616 fallbackTCP = useTcp ? true : false;
617 retry_count_for_event = attempt;
618 LOG(INFO) << __func__ << ": used send_dg " << resplen << " terrno: " << terrno;
619 }
620
621 const IPSockAddr& receivedServerAddr = statp->nsaddrs[actualNs];
622 DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
623 dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
624 // When |retryTimes| > 1, we cannot actually know the correct latency value if we
625 // received the answer from the previous server. So temporarily set the latency as -1 if
626 // that condition happened.
627 // TODO: make the latency value accurate.
628 dnsQueryEvent->set_latency_micros(
629 (actualNs == ns) ? saturate_cast<int32_t>(queryStopwatch.timeTakenUs()) : -1);
630 dnsQueryEvent->set_dns_server_index(actualNs);
631 dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(receivedServerAddr.family()));
632 dnsQueryEvent->set_retry_times(retry_count_for_event);
633 dnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
634 dnsQueryEvent->set_protocol(query_proto);
635 dnsQueryEvent->set_type(getQueryType(msg));
636 dnsQueryEvent->set_linux_errno(static_cast<LinuxErrno>(terrno));
637
638 // Only record stats the first time we try a query. This ensures that
639 // queries that deterministically fail (e.g., a name that always returns
640 // SERVFAIL or times out) do not unduly affect the stats.
641 if (shouldRecordStats) {
642 // (b/151166599): This is a workaround to prevent that DnsResolver calculates the
643 // reliability of DNS servers from being broken when network restricted mode is
644 // enabled.
645 // TODO: Introduce the new server selection instead of skipping stats recording.
646 if (!isNetworkRestricted(terrno)) {
647 res_sample sample;
648 res_stats_set_sample(&sample, query_time, *rcode, delay);
649 // KeepListening UDP mechanism is incompatible with usable_servers of legacy
650 // stats, so keep the old logic for now.
651 // TODO: Replace usable_servers of legacy stats with new one.
652 resolv_cache_add_resolver_stats_sample(
653 statp->netid, revision_id, serverSockAddr, sample, params.max_samples);
654 resolv_stats_add(statp->netid, receivedServerAddr, dnsQueryEvent);
655 }
656 }
657
658 if (resplen == 0) continue;
659 if (fallbackTCP) {
660 ns--;
661 continue;
662 }
663 if (resplen < 0) {
664 _resolv_cache_query_failed(statp->netid, msg, flags);
665 statp->closeSockets();
666 return -terrno;
667 }
668
669 LOG(DEBUG) << __func__ << ": got answer:";
670 res_pquery(ans.first(resplen));
671
672 if (cache_status == RESOLV_CACHE_NOTFOUND) {
673 resolv_cache_add(statp->netid, msg, {ans.data(), resplen});
674 }
675 statp->closeSockets();
676 return (resplen);
677 } // for each ns
678 } // for each retry
679 statp->closeSockets();
680 terrno = useTcp ? terrno : gotsomewhere ? ETIMEDOUT : ECONNREFUSED;
681 // TODO: Remove errno once callers stop using it
682 errno = useTcp ? terrno
683 : gotsomewhere ? ETIMEDOUT /* no answer obtained */
684 : ECONNREFUSED /* no nameservers found */;
685
686 _resolv_cache_query_failed(statp->netid, msg, flags);
687 return -terrno;
688 }
689
get_timeout(ResState * statp,const res_params * params,const int addrIndex)690 static struct timespec get_timeout(ResState* statp, const res_params* params, const int addrIndex) {
691 int msec;
692 msec = params->base_timeout_msec << addrIndex;
693 // Legacy algorithm which scales the timeout by nameserver number.
694 // For instance, with 4 nameservers: 5s, 2.5s, 5s, 10s
695 // This has no effect with 1 or 2 nameservers
696 if (addrIndex > 0) {
697 msec /= statp->nameserverCount();
698 }
699 // For safety, don't allow OEMs and experiments to configure a timeout shorter than 1s.
700 if (msec < 1000) {
701 msec = 1000; // Use at least 1000ms
702 }
703 LOG(INFO) << __func__ << ": using timeout of " << msec << " msec";
704
705 struct timespec result;
706 result.tv_sec = msec / 1000;
707 result.tv_nsec = (msec % 1000) * 1000000;
708 return result;
709 }
710
send_vc(ResState * statp,res_params * params,span<const uint8_t> msg,span<uint8_t> ans,int * terrno,size_t ns,time_t * at,int * rcode,int * delay)711 static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
712 int* terrno, size_t ns, time_t* at, int* rcode, int* delay) {
713 *at = time(NULL);
714 *delay = 0;
715 const HEADER* hp = (const HEADER*)(const void*)msg.data();
716 HEADER* anhp = (HEADER*)(void*)ans.data();
717 struct sockaddr* nsap;
718 int nsaplen;
719 int truncating, connreset, n;
720 uint8_t* cp;
721
722 LOG(INFO) << __func__ << ": using send_vc";
723
724 // It should never happen, but just in case.
725 if (ns >= statp->nsaddrs.size()) {
726 LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
727 *terrno = EINVAL;
728 return -1;
729 }
730
731 sockaddr_storage ss = statp->nsaddrs[ns];
732 nsap = reinterpret_cast<sockaddr*>(&ss);
733 nsaplen = sockaddrSize(nsap);
734
735 connreset = 0;
736 same_ns:
737 truncating = 0;
738
739 struct timespec start_time = evNowTime();
740
741 /* Are we still talking to whom we want to talk to? */
742 if (statp->tcp_nssock >= 0 && (statp->flags & RES_F_VC) != 0) {
743 struct sockaddr_storage peer;
744 socklen_t size = sizeof peer;
745 unsigned old_mark;
746 socklen_t mark_size = sizeof(old_mark);
747 if (getpeername(statp->tcp_nssock, (struct sockaddr*)(void*)&peer, &size) < 0 ||
748 !sock_eq((struct sockaddr*)(void*)&peer, nsap) ||
749 getsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &old_mark, &mark_size) < 0 ||
750 old_mark != statp->mark) {
751 statp->closeSockets();
752 }
753 }
754
755 if (statp->tcp_nssock < 0 || (statp->flags & RES_F_VC) == 0) {
756 if (statp->tcp_nssock >= 0) statp->closeSockets();
757
758 statp->tcp_nssock.reset(socket(nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0));
759 if (statp->tcp_nssock < 0) {
760 *terrno = errno;
761 PLOG(DEBUG) << __func__ << ": socket(vc): ";
762 switch (errno) {
763 case EPROTONOSUPPORT:
764 case EPFNOSUPPORT:
765 case EAFNOSUPPORT:
766 return 0;
767 default:
768 return -1;
769 }
770 }
771 const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
772 resolv_tag_socket(statp->tcp_nssock, uid, statp->pid);
773 if (statp->mark != MARK_UNSET) {
774 if (setsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &statp->mark,
775 sizeof(statp->mark)) < 0) {
776 *terrno = errno;
777 PLOG(DEBUG) << __func__ << ": setsockopt: ";
778 return -1;
779 }
780 }
781 errno = 0;
782 if (random_bind(statp->tcp_nssock, nsap->sa_family) < 0) {
783 *terrno = errno;
784 dump_error("bind/vc", nsap);
785 statp->closeSockets();
786 return (0);
787 }
788 if (connect_with_timeout(statp->tcp_nssock, nsap, (socklen_t)nsaplen,
789 get_timeout(statp, params, ns)) < 0) {
790 *terrno = errno;
791 dump_error("connect/vc", nsap);
792 statp->closeSockets();
793 /*
794 * The way connect_with_timeout() is implemented prevents us from reliably
795 * determining whether this was really a timeout or e.g. ECONNREFUSED. Since
796 * currently both cases are handled in the same way, there is no need to
797 * change this (yet). If we ever need to reliably distinguish between these
798 * cases, both connect_with_timeout() and retrying_poll() need to be
799 * modified, though.
800 */
801 *rcode = RCODE_TIMEOUT;
802 return (0);
803 }
804 statp->flags |= RES_F_VC;
805 }
806
807 /*
808 * Send length & message
809 */
810 uint16_t len = htons(static_cast<uint16_t>(msg.size()));
811 const iovec iov[] = {
812 {.iov_base = &len, .iov_len = INT16SZ},
813 {.iov_base = const_cast<uint8_t*>(msg.data()),
814 .iov_len = static_cast<size_t>(msg.size())},
815 };
816 if (writev(statp->tcp_nssock, iov, 2) != (INT16SZ + msg.size())) {
817 *terrno = errno;
818 PLOG(DEBUG) << __func__ << ": write failed: ";
819 statp->closeSockets();
820 return (0);
821 }
822 /*
823 * Receive length & response
824 */
825 read_len:
826 cp = ans.data();
827 len = INT16SZ;
828 while ((n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
829 cp += n;
830 if ((len -= n) == 0) break;
831 }
832 if (n <= 0) {
833 *terrno = errno;
834 PLOG(DEBUG) << __func__ << ": read failed: ";
835 statp->closeSockets();
836 /*
837 * A long running process might get its TCP
838 * connection reset if the remote server was
839 * restarted. Requery the server instead of
840 * trying a new one. When there is only one
841 * server, this means that a query might work
842 * instead of failing. We only allow one reset
843 * per query to prevent looping.
844 */
845 if (*terrno == ECONNRESET && !connreset) {
846 connreset = 1;
847 goto same_ns;
848 }
849 return (0);
850 }
851 uint16_t resplen = ntohs(*reinterpret_cast<const uint16_t*>(ans.data()));
852 if (resplen > ans.size()) {
853 LOG(DEBUG) << __func__ << ": response truncated";
854 truncating = 1;
855 len = ans.size();
856 } else
857 len = resplen;
858 if (len < HFIXEDSZ) {
859 /*
860 * Undersized message.
861 */
862 LOG(DEBUG) << __func__ << ": undersized: " << len;
863 *terrno = EMSGSIZE;
864 statp->closeSockets();
865 return (0);
866 }
867 cp = ans.data();
868 while (len != 0 && (n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
869 cp += n;
870 len -= n;
871 }
872 if (n <= 0) {
873 *terrno = errno;
874 PLOG(DEBUG) << __func__ << ": read(vc): ";
875 statp->closeSockets();
876 return (0);
877 }
878
879 if (truncating) {
880 /*
881 * Flush rest of answer so connection stays in synch.
882 */
883 anhp->tc = 1;
884 len = resplen - ans.size();
885 while (len != 0) {
886 char junk[PACKETSZ];
887
888 n = read(statp->tcp_nssock, junk, (len > sizeof junk) ? sizeof junk : len);
889 if (n > 0)
890 len -= n;
891 else
892 break;
893 }
894 LOG(WARNING) << __func__ << ": resplen " << resplen << " exceeds buf size " << ans.size();
895 // return size should never exceed container size
896 resplen = ans.size();
897 }
898 /*
899 * If the calling application has bailed out of
900 * a previous call and failed to arrange to have
901 * the circuit closed or the server has got
902 * itself confused, then drop the packet and
903 * wait for the correct one.
904 */
905 if (hp->id != anhp->id) {
906 LOG(DEBUG) << __func__ << ": ld answer (unexpected):";
907 res_pquery({ans.data(), resplen});
908 goto read_len;
909 }
910
911 /*
912 * All is well, or the error is fatal. Signal that the
913 * next nameserver ought not be tried.
914 */
915 if (resplen > 0) {
916 struct timespec done = evNowTime();
917 *delay = res_stats_calculate_rtt(&done, &start_time);
918 *rcode = anhp->rcode;
919 }
920 *terrno = 0;
921 return (resplen);
922 }
923
924 /* return -1 on error (errno set), 0 on success */
connect_with_timeout(int sock,const sockaddr * nsap,socklen_t salen,const timespec timeout)925 static int connect_with_timeout(int sock, const sockaddr* nsap, socklen_t salen,
926 const timespec timeout) {
927 int res, origflags;
928
929 origflags = fcntl(sock, F_GETFL, 0);
930 fcntl(sock, F_SETFL, origflags | O_NONBLOCK);
931
932 res = connect(sock, nsap, salen);
933 if (res < 0 && errno != EINPROGRESS) {
934 res = -1;
935 goto done;
936 }
937 if (res != 0) {
938 timespec now = evNowTime();
939 timespec finish = evAddTime(now, timeout);
940 LOG(INFO) << __func__ << ": " << sock << " send_vc";
941 res = retrying_poll(sock, POLLIN | POLLOUT, &finish);
942 if (res <= 0) {
943 res = -1;
944 }
945 }
946 done:
947 fcntl(sock, F_SETFL, origflags);
948 LOG(INFO) << __func__ << ": " << sock << " connect_with_const timeout returning " << res;
949 return res;
950 }
951
retrying_poll(const int sock,const short events,const struct timespec * finish)952 static int retrying_poll(const int sock, const short events, const struct timespec* finish) {
953 struct timespec now, timeout;
954
955 retry:
956 LOG(INFO) << __func__ << ": " << sock << " retrying_poll";
957
958 now = evNowTime();
959 if (evCmpTime(*finish, now) > 0)
960 timeout = evSubTime(*finish, now);
961 else
962 timeout = evConsTime(0L, 0L);
963 struct pollfd fds = {.fd = sock, .events = events};
964 int n = ppoll(&fds, 1, &timeout, /*__mask=*/NULL);
965 if (n == 0) {
966 LOG(INFO) << __func__ << ": " << sock << " retrying_poll timeout";
967 errno = ETIMEDOUT;
968 return 0;
969 }
970 if (n < 0) {
971 if (errno == EINTR) goto retry;
972 PLOG(INFO) << __func__ << ": " << sock << " retrying_poll failed";
973 return n;
974 }
975 if (fds.revents & (POLLIN | POLLOUT | POLLERR)) {
976 int error;
977 socklen_t len = sizeof(error);
978 if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0 || error) {
979 errno = error;
980 PLOG(INFO) << __func__ << ": " << sock << " retrying_poll getsockopt failed";
981 return -1;
982 }
983 }
984 LOG(INFO) << __func__ << ": " << sock << " retrying_poll returning " << n;
985 return n;
986 }
987
extractUdpFdset(ResState * statp,const short events=POLLIN)988 static std::vector<pollfd> extractUdpFdset(ResState* statp, const short events = POLLIN) {
989 std::vector<pollfd> fdset(statp->nsaddrs.size());
990 for (size_t i = 0; i < statp->nsaddrs.size(); ++i) {
991 fdset[i] = {.fd = statp->udpsocks[i], .events = events};
992 }
993 return fdset;
994 }
995
udpRetryingPoll(ResState * statp,const timespec * finish)996 static Result<std::vector<int>> udpRetryingPoll(ResState* statp, const timespec* finish) {
997 for (;;) {
998 LOG(DEBUG) << __func__ << ": poll";
999 timespec start_time = evNowTime();
1000 timespec timeout = (evCmpTime(*finish, start_time) > 0) ? evSubTime(*finish, start_time)
1001 : evConsTime(0L, 0L);
1002 std::vector<pollfd> fdset = extractUdpFdset(statp);
1003 const int n = ppoll(fdset.data(), fdset.size(), &timeout, /*__mask=*/nullptr);
1004 if (n <= 0) {
1005 if (errno == EINTR && n < 0) continue;
1006 if (n == 0) errno = ETIMEDOUT;
1007 PLOG(INFO) << __func__ << ": failed";
1008 return ErrnoError();
1009 }
1010 std::vector<int> fdsToRead;
1011 for (const auto& pollfd : fdset) {
1012 if (pollfd.revents & (POLLIN | POLLERR)) {
1013 fdsToRead.push_back(pollfd.fd);
1014 }
1015 }
1016 LOG(DEBUG) << __func__ << ": "
1017 << " returning fd size: " << fdsToRead.size();
1018 return fdsToRead;
1019 }
1020 }
1021
udpRetryingPollWrapper(ResState * statp,int addrInfo,const timespec * finish)1022 static Result<std::vector<int>> udpRetryingPollWrapper(ResState* statp, int addrInfo,
1023 const timespec* finish) {
1024 const bool keepListeningUdp =
1025 android::net::Experiments::getInstance()->getFlag("keep_listening_udp", 0);
1026 if (keepListeningUdp) return udpRetryingPoll(statp, finish);
1027
1028 if (int n = retrying_poll(statp->udpsocks[addrInfo], POLLIN, finish); n <= 0) {
1029 return ErrnoError();
1030 }
1031 return std::vector<int>{statp->udpsocks[addrInfo]};
1032 }
1033
ignoreInvalidAnswer(ResState * statp,const sockaddr_storage & from,span<const uint8_t> msg,span<uint8_t> ans,int * receivedFromNs)1034 bool ignoreInvalidAnswer(ResState* statp, const sockaddr_storage& from, span<const uint8_t> msg,
1035 span<uint8_t> ans, int* receivedFromNs) {
1036 const HEADER* hp = (const HEADER*)(const void*)msg.data();
1037 HEADER* anhp = (HEADER*)(void*)ans.data();
1038 if (hp->id != anhp->id) {
1039 // response from old query, ignore it.
1040 LOG(DEBUG) << __func__ << ": old answer:";
1041 return true;
1042 }
1043 if (*receivedFromNs = res_ourserver_p(statp, (sockaddr*)(void*)&from); *receivedFromNs < 0) {
1044 // response from wrong server? ignore it.
1045 LOG(DEBUG) << __func__ << ": not our server:";
1046 return true;
1047 }
1048 if (!res_queriesmatch(msg.data(), msg.data() + msg.size(), ans.data(),
1049 ans.data() + ans.size())) {
1050 // response contains wrong query? ignore it.
1051 LOG(DEBUG) << __func__ << ": wrong query name:";
1052 return true;
1053 }
1054 return false;
1055 }
1056
1057 // return 1 - setup udp socket success.
1058 // return 0 - bind error, protocol error.
1059 // return -1 - create socket fail, except |EPROTONOSUPPORT| EPFNOSUPPORT |EAFNOSUPPORT|.
1060 // set socket option fail.
setupUdpSocket(ResState * statp,const sockaddr * sockap,unique_fd * fd_out,int * terrno)1061 static int setupUdpSocket(ResState* statp, const sockaddr* sockap, unique_fd* fd_out, int* terrno) {
1062 fd_out->reset(socket(sockap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0));
1063
1064 if (*fd_out < 0) {
1065 *terrno = errno;
1066 PLOG(ERROR) << __func__ << ": socket: ";
1067 switch (errno) {
1068 case EPROTONOSUPPORT:
1069 case EPFNOSUPPORT:
1070 case EAFNOSUPPORT:
1071 return 0;
1072 default:
1073 return -1;
1074 }
1075 }
1076 const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
1077 resolv_tag_socket(*fd_out, uid, statp->pid);
1078 if (statp->mark != MARK_UNSET) {
1079 if (setsockopt(*fd_out, SOL_SOCKET, SO_MARK, &(statp->mark), sizeof(statp->mark)) < 0) {
1080 *terrno = errno;
1081 return -1;
1082 }
1083 }
1084
1085 if (random_bind(*fd_out, sockap->sa_family) < 0) {
1086 *terrno = errno;
1087 dump_error("bind", sockap);
1088 return 0;
1089 }
1090 return 1;
1091 }
1092
send_dg(ResState * statp,res_params * params,span<const uint8_t> msg,span<uint8_t> ans,int * terrno,size_t * ns,int * v_circuit,int * gotsomewhere,time_t * at,int * rcode,int * delay)1093 static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
1094 int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, time_t* at,
1095 int* rcode, int* delay) {
1096 // It should never happen, but just in case.
1097 if (*ns >= statp->nsaddrs.size()) {
1098 LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
1099 *terrno = EINVAL;
1100 return -1;
1101 }
1102
1103 *at = time(nullptr);
1104 *delay = 0;
1105 const sockaddr_storage ss = statp->nsaddrs[*ns];
1106 const sockaddr* nsap = reinterpret_cast<const sockaddr*>(&ss);
1107
1108 if (statp->udpsocks[*ns] == -1) {
1109 int result = setupUdpSocket(statp, nsap, &statp->udpsocks[*ns], terrno);
1110 if (result <= 0) return result;
1111
1112 // Use a "connected" datagram socket to receive an ECONNREFUSED error
1113 // on the next socket operation when the server responds with an
1114 // ICMP port-unreachable error. This way we can detect the absence of
1115 // a nameserver without timing out.
1116 if (connect(statp->udpsocks[*ns], nsap, sockaddrSize(nsap)) < 0) {
1117 *terrno = errno;
1118 dump_error("connect(dg)", nsap);
1119 statp->closeSockets();
1120 return 0;
1121 }
1122 LOG(DEBUG) << __func__ << ": new DG socket";
1123 }
1124 if (send(statp->udpsocks[*ns], msg.data(), msg.size(), 0) != msg.size()) {
1125 *terrno = errno;
1126 PLOG(DEBUG) << __func__ << ": send: ";
1127 statp->closeSockets();
1128 return 0;
1129 }
1130
1131 timespec timeout = get_timeout(statp, params, *ns);
1132 timespec start_time = evNowTime();
1133 timespec finish = evAddTime(start_time, timeout);
1134 for (;;) {
1135 // Wait for reply.
1136 auto result = udpRetryingPollWrapper(statp, *ns, &finish);
1137
1138 if (!result.has_value()) {
1139 const bool isTimeout = (result.error().code() == ETIMEDOUT);
1140 *rcode = (isTimeout) ? RCODE_TIMEOUT : *rcode;
1141 *terrno = (isTimeout) ? ETIMEDOUT : errno;
1142 *gotsomewhere = (isTimeout) ? 1 : *gotsomewhere;
1143 // Leave the UDP sockets open on timeout so we can keep listening for
1144 // a late response from this server while retrying on the next server.
1145 if (!isTimeout) statp->closeSockets();
1146 LOG(DEBUG) << __func__ << ": " << (isTimeout ? "timeout" : "poll");
1147 return 0;
1148 }
1149 bool needRetry = false;
1150 for (int fd : result.value()) {
1151 needRetry = false;
1152 sockaddr_storage from;
1153 socklen_t fromlen = sizeof(from);
1154 int resplen =
1155 recvfrom(fd, ans.data(), ans.size(), 0, (sockaddr*)(void*)&from, &fromlen);
1156 if (resplen <= 0) {
1157 *terrno = errno;
1158 PLOG(DEBUG) << __func__ << ": recvfrom: ";
1159 continue;
1160 }
1161 *gotsomewhere = 1;
1162 if (resplen < HFIXEDSZ) {
1163 // Undersized message.
1164 LOG(DEBUG) << __func__ << ": undersized: " << resplen;
1165 *terrno = EMSGSIZE;
1166 continue;
1167 }
1168
1169 int receivedFromNs = *ns;
1170 if (needRetry = ignoreInvalidAnswer(statp, from, msg, ans, &receivedFromNs);
1171 needRetry) {
1172 res_pquery({ans.data(), (resplen > ans.size()) ? ans.size() : resplen});
1173 continue;
1174 }
1175
1176 HEADER* anhp = (HEADER*)(void*)ans.data();
1177 if (anhp->rcode == FORMERR && (statp->netcontext_flags & NET_CONTEXT_FLAG_USE_EDNS)) {
1178 // Do not retry if the server do not understand EDNS0.
1179 // The case has to be captured here, as FORMERR packet do not
1180 // carry query section, hence res_queriesmatch() returns 0.
1181 LOG(DEBUG) << __func__ << ": server rejected query with EDNS0:";
1182 res_pquery({ans.data(), (resplen > ans.size()) ? ans.size() : resplen});
1183 // record the error
1184 statp->flags |= RES_F_EDNS0ERR;
1185 *terrno = EREMOTEIO;
1186 continue;
1187 }
1188
1189 timespec done = evNowTime();
1190 *delay = res_stats_calculate_rtt(&done, &start_time);
1191 if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
1192 LOG(DEBUG) << __func__ << ": server rejected query:";
1193 res_pquery({ans.data(), (resplen > ans.size()) ? ans.size() : resplen});
1194 *rcode = anhp->rcode;
1195 continue;
1196 }
1197 if (anhp->tc) {
1198 // To get the rest of answer,
1199 // use TCP with same server.
1200 LOG(DEBUG) << __func__ << ": truncated answer";
1201 *terrno = E2BIG;
1202 *v_circuit = 1;
1203 return 1;
1204 }
1205 // All is well, or the error is fatal. Signal that the
1206 // next nameserver ought not be tried.
1207
1208 *rcode = anhp->rcode;
1209 *ns = receivedFromNs;
1210 *terrno = 0;
1211 return resplen;
1212 }
1213 if (!needRetry) return 0;
1214 }
1215 }
1216
1217 // return length - when receiving valid packets.
1218 // return 0 - when mdns packets transfer error.
send_mdns(ResState * statp,span<const uint8_t> msg,span<uint8_t> ans,int * terrno,int * rcode)1219 static int send_mdns(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* terrno,
1220 int* rcode) {
1221 const sockaddr_storage ss = (getQueryType(msg) == NS_T_AAAA) ? mdns_addrs[0] : mdns_addrs[1];
1222 const sockaddr* mdnsap = reinterpret_cast<const sockaddr*>(&ss);
1223 unique_fd fd;
1224
1225 if (setupUdpSocket(statp, mdnsap, &fd, terrno) <= 0) return 0;
1226
1227 if (sendto(fd, msg.data(), msg.size(), 0, mdnsap, sockaddrSize(mdnsap)) != msg.size()) {
1228 *terrno = errno;
1229 return 0;
1230 }
1231 // RFC 6762: Typically, the timeout would also be shortened to two or three seconds.
1232 const struct timespec finish = evAddTime(evNowTime(), {2, 2000000});
1233
1234 // Wait for reply.
1235 if (retrying_poll(fd, POLLIN, &finish) <= 0) {
1236 *terrno = errno;
1237 if (*terrno == ETIMEDOUT) *rcode = RCODE_TIMEOUT;
1238 LOG(ERROR) << __func__ << ": " << ((*terrno == ETIMEDOUT) ? "timeout" : "poll failed");
1239 return 0;
1240 }
1241
1242 sockaddr_storage from;
1243 socklen_t fromlen = sizeof(from);
1244 int resplen = recvfrom(fd, ans.data(), ans.size(), 0, (sockaddr*)(void*)&from, &fromlen);
1245
1246 if (resplen <= 0) {
1247 *terrno = errno;
1248 return 0;
1249 }
1250
1251 if (resplen < HFIXEDSZ) {
1252 // Undersized message.
1253 LOG(ERROR) << __func__ << ": undersized: " << resplen;
1254 *terrno = EMSGSIZE;
1255 return 0;
1256 }
1257
1258 HEADER* anhp = (HEADER*)(void*)ans.data();
1259 if (anhp->tc) {
1260 LOG(DEBUG) << __func__ << ": truncated answer";
1261 *terrno = E2BIG;
1262 return 0;
1263 }
1264
1265 *rcode = anhp->rcode;
1266 *terrno = 0;
1267 return resplen;
1268 }
1269
dump_error(const char * str,const struct sockaddr * address)1270 static void dump_error(const char* str, const struct sockaddr* address) {
1271 char hbuf[NI_MAXHOST];
1272 char sbuf[NI_MAXSERV];
1273 constexpr int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
1274 const int err = errno;
1275
1276 if (!WOULD_LOG(DEBUG)) return;
1277
1278 if (getnameinfo(address, sockaddrSize(address), hbuf, sizeof(hbuf), sbuf, sizeof(sbuf),
1279 niflags)) {
1280 strncpy(hbuf, "?", sizeof(hbuf) - 1);
1281 hbuf[sizeof(hbuf) - 1] = '\0';
1282 strncpy(sbuf, "?", sizeof(sbuf) - 1);
1283 sbuf[sizeof(sbuf) - 1] = '\0';
1284 }
1285 errno = err;
1286 PLOG(DEBUG) << __func__ << ": " << str << " ([" << hbuf << "]." << sbuf << "): ";
1287 }
1288
sock_eq(struct sockaddr * a,struct sockaddr * b)1289 static int sock_eq(struct sockaddr* a, struct sockaddr* b) {
1290 struct sockaddr_in *a4, *b4;
1291 struct sockaddr_in6 *a6, *b6;
1292
1293 if (a->sa_family != b->sa_family) return 0;
1294 switch (a->sa_family) {
1295 case AF_INET:
1296 a4 = (struct sockaddr_in*) (void*) a;
1297 b4 = (struct sockaddr_in*) (void*) b;
1298 return a4->sin_port == b4->sin_port && a4->sin_addr.s_addr == b4->sin_addr.s_addr;
1299 case AF_INET6:
1300 a6 = (struct sockaddr_in6*) (void*) a;
1301 b6 = (struct sockaddr_in6*) (void*) b;
1302 return a6->sin6_port == b6->sin6_port &&
1303 #ifdef HAVE_SIN6_SCOPE_ID
1304 a6->sin6_scope_id == b6->sin6_scope_id &&
1305 #endif
1306 IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
1307 default:
1308 return 0;
1309 }
1310 }
1311
convertEnumType(PrivateDnsMode privateDnsmode)1312 PrivateDnsModes convertEnumType(PrivateDnsMode privateDnsmode) {
1313 switch (privateDnsmode) {
1314 case PrivateDnsMode::OFF:
1315 return PrivateDnsModes::PDM_OFF;
1316 case PrivateDnsMode::OPPORTUNISTIC:
1317 return PrivateDnsModes::PDM_OPPORTUNISTIC;
1318 case PrivateDnsMode::STRICT:
1319 return PrivateDnsModes::PDM_STRICT;
1320 default:
1321 return PrivateDnsModes::PDM_UNKNOWN;
1322 }
1323 }
1324
res_private_dns_send(ResState * statp,const Slice query,const Slice answer,int * rcode,bool * fallback)1325 static int res_private_dns_send(ResState* statp, const Slice query, const Slice answer, int* rcode,
1326 bool* fallback) {
1327 const unsigned netId = statp->netid;
1328
1329 auto& privateDnsConfiguration = PrivateDnsConfiguration::getInstance();
1330 PrivateDnsStatus privateDnsStatus = privateDnsConfiguration.getStatus(netId);
1331 statp->event->set_private_dns_modes(convertEnumType(privateDnsStatus.mode));
1332
1333 const bool enableDoH = isDoHEnabled();
1334 ssize_t result = -1;
1335 switch (privateDnsStatus.mode) {
1336 case PrivateDnsMode::OFF: {
1337 *fallback = true;
1338 return -1;
1339 }
1340 case PrivateDnsMode::OPPORTUNISTIC: {
1341 *fallback = true;
1342 if (enableDoH && privateDnsStatus.hasValidatedDohServers()) {
1343 result = res_doh_send(statp, query, answer, rcode);
1344 if (result != DOH_RESULT_CAN_NOT_SEND) return result;
1345 }
1346 return res_tls_send(privateDnsStatus.validatedServers(), statp, query, answer, rcode,
1347 privateDnsStatus.mode);
1348 }
1349 case PrivateDnsMode::STRICT: {
1350 *fallback = false;
1351 if (enableDoH && privateDnsStatus.hasValidatedDohServers()) {
1352 result = res_doh_send(statp, query, answer, rcode);
1353 if (result != DOH_RESULT_CAN_NOT_SEND) return result;
1354 }
1355 if (privateDnsStatus.validatedServers().empty()) {
1356 // Sleep and iterate some small number of times checking for the
1357 // arrival of resolved and validated server IP addresses, instead
1358 // of returning an immediate error.
1359 // This is needed because as soon as a network becomes the default network, apps
1360 // will send DNS queries on that network. If no servers have yet validated, and we
1361 // do not block those queries, they would immediately fail, causing
1362 // application-visible errors. Note that this can happen even before the network
1363 // validates, since an unvalidated network can become the default network if no
1364 // validated networks are available.
1365 //
1366 // TODO: see if there is a better way to address this problem, such as buffering the
1367 // queries in a queue or only blocking queries for the first few seconds after a
1368 // default network change.
1369 for (int i = 0; i < 42; i++) {
1370 std::this_thread::sleep_for(std::chrono::milliseconds(100));
1371
1372 // Calling getStatus() to merely check if there's any validated server seems
1373 // wasteful. Consider adding a new method in PrivateDnsConfiguration for speed
1374 // ups.
1375 privateDnsStatus = privateDnsConfiguration.getStatus(netId);
1376
1377 if (enableDoH && privateDnsStatus.hasValidatedDohServers()) {
1378 result = res_doh_send(statp, query, answer, rcode);
1379 if (result != DOH_RESULT_CAN_NOT_SEND) return result;
1380 }
1381
1382 // Switch to use the DoT servers if they are validated.
1383 if (!privateDnsStatus.validatedServers().empty()) {
1384 break;
1385 }
1386 }
1387 }
1388 return res_tls_send(privateDnsStatus.validatedServers(), statp, query, answer, rcode,
1389 privateDnsStatus.mode);
1390 }
1391 }
1392 LOG(ERROR) << __func__ << ": unknown private DNS mode";
1393 return -1;
1394 }
1395
res_doh_send(ResState * statp,const Slice query,const Slice answer,int * rcode)1396 ssize_t res_doh_send(ResState* statp, const Slice query, const Slice answer, int* rcode) {
1397 auto& privateDnsConfiguration = PrivateDnsConfiguration::getInstance();
1398 const unsigned netId = statp->netid;
1399 LOG(INFO) << __func__ << ": performing query over Https";
1400 Stopwatch queryStopwatch;
1401 int queryTimeout = Experiments::getInstance()->getFlag(
1402 "doh_query_timeout_ms", PrivateDnsConfiguration::kDohQueryDefaultTimeoutMs);
1403 if (queryTimeout < 1000) {
1404 queryTimeout = 1000;
1405 }
1406 ssize_t result = privateDnsConfiguration.dohQuery(netId, query, answer, queryTimeout);
1407 LOG(INFO) << __func__ << ": Https query result: " << result << ", netid=" << netId;
1408
1409 if (result == DOH_RESULT_CAN_NOT_SEND) return DOH_RESULT_CAN_NOT_SEND;
1410
1411 DnsQueryEvent* dnsQueryEvent = statp->event->mutable_dns_query_events()->add_dns_query_event();
1412 dnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
1413 // TODO: Make this information available.
1414 // dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(?));
1415 if (result > 0) {
1416 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1417 } else {
1418 *rcode = -result;
1419 }
1420 dnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
1421 dnsQueryEvent->set_protocol(PROTO_DOH);
1422 span<const uint8_t> msg(query.base(), query.size());
1423 dnsQueryEvent->set_type(getQueryType(msg));
1424
1425 auto dohServerAddr = privateDnsConfiguration.getDohServer(netId);
1426 if (dohServerAddr.ok()) {
1427 resolv_stats_add(netId, dohServerAddr.value(), dnsQueryEvent);
1428 }
1429
1430 return result;
1431 }
1432
res_tls_send(const std::list<DnsTlsServer> & tlsServers,ResState * statp,const Slice query,const Slice answer,int * rcode,PrivateDnsMode mode)1433 int res_tls_send(const std::list<DnsTlsServer>& tlsServers, ResState* statp, const Slice query,
1434 const Slice answer, int* rcode, PrivateDnsMode mode) {
1435 if (tlsServers.empty()) return -1;
1436 LOG(INFO) << __func__ << ": performing query over TLS";
1437 const bool dotQuickFallback =
1438 (mode == PrivateDnsMode::STRICT)
1439 ? 0
1440 : Experiments::getInstance()->getFlag("dot_quick_fallback", 1);
1441 int resplen = 0;
1442 const auto response = DnsTlsDispatcher::getInstance().query(tlsServers, statp, query, answer,
1443 &resplen, dotQuickFallback);
1444
1445 LOG(INFO) << __func__ << ": TLS query result: " << static_cast<int>(response);
1446 if (mode == PrivateDnsMode::OPPORTUNISTIC) {
1447 // In opportunistic mode, handle falling back to cleartext in some
1448 // cases (DNS shouldn't fail if a validated opportunistic mode server
1449 // becomes unreachable for some reason).
1450 switch (response) {
1451 case DnsTlsTransport::Response::success:
1452 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1453 return resplen;
1454 // It's OPPORTUNISTIC mode,
1455 // hence it's not required to do anything because it'll fallback to UDP.
1456 case DnsTlsTransport::Response::network_error:
1457 [[fallthrough]];
1458 case DnsTlsTransport::Response::internal_error:
1459 [[fallthrough]];
1460 default:
1461 return -1;
1462 }
1463 } else {
1464 // Strict mode
1465 switch (response) {
1466 case DnsTlsTransport::Response::success:
1467 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1468 return resplen;
1469 case DnsTlsTransport::Response::network_error:
1470 // This case happens when the query stored in DnsTlsTransport is expired since
1471 // either 1) the query has been tried for 3 times but no response or 2) fail to
1472 // establish the connection with the server.
1473 *rcode = RCODE_TIMEOUT;
1474 [[fallthrough]];
1475 default:
1476 return -1;
1477 }
1478 }
1479 }
1480
resolv_res_nsend(const android_net_context * netContext,span<const uint8_t> msg,span<uint8_t> ans,int * rcode,uint32_t flags,NetworkDnsEventReported * event)1481 int resolv_res_nsend(const android_net_context* netContext, span<const uint8_t> msg,
1482 span<uint8_t> ans, int* rcode, uint32_t flags,
1483 NetworkDnsEventReported* event) {
1484 assert(event != nullptr);
1485 ResState res(netContext, event);
1486 resolv_populate_res_for_net(&res);
1487 *rcode = NOERROR;
1488 return res_nsend(&res, msg, ans, rcode, flags);
1489 }
1490