• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$NetBSD: res_send.c,v 1.9 2006/01/24 17:41:25 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1985, 1989, 1993
5  *    The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  * 	This product includes software developed by the University of
18  * 	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * Portions Copyright (c) 1993 by Digital Equipment Corporation.
38  *
39  * Permission to use, copy, modify, and distribute this software for any
40  * purpose with or without fee is hereby granted, provided that the above
41  * copyright notice and this permission notice appear in all copies, and that
42  * the name of Digital Equipment Corporation not be used in advertising or
43  * publicity pertaining to distribution of the document or software without
44  * specific, written prior permission.
45  *
46  * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
47  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
48  * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
49  * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
50  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
51  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
52  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53  * SOFTWARE.
54  */
55 
56 /*
57  * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
58  * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
59  *
60  * Permission to use, copy, modify, and distribute this software for any
61  * purpose with or without fee is hereby granted, provided that the above
62  * copyright notice and this permission notice appear in all copies.
63  *
64  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
65  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66  * MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR
67  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
70  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71  */
72 
73 /*
74  * Send query to name server and wait for reply.
75  */
76 
77 #define LOG_TAG "resolv"
78 
79 #include <chrono>
80 
81 #include <sys/param.h>
82 #include <sys/socket.h>
83 #include <sys/time.h>
84 #include <sys/uio.h>
85 
86 #include <arpa/inet.h>
87 #include <arpa/nameser.h>
88 
89 #include <errno.h>
90 #include <fcntl.h>
91 #include <netdb.h>
92 #include <poll.h>
93 #include <signal.h>
94 #include <stdlib.h>
95 #include <string.h>
96 #include <time.h>
97 #include <unistd.h>
98 #include <span>
99 
100 #include <android-base/logging.h>
101 #include <android-base/result.h>
102 #include <android/multinetwork.h>  // ResNsendFlags
103 
104 #include <netdutils/Slice.h>
105 #include <netdutils/Stopwatch.h>
106 #include "DnsTlsDispatcher.h"
107 #include "DnsTlsTransport.h"
108 #include "Experiments.h"
109 #include "PrivateDnsConfiguration.h"
110 #include "netd_resolv/resolv.h"
111 #include "private/android_filesystem_config.h"
112 
113 #include "doh.h"
114 #include "res_comp.h"
115 #include "res_debug.h"
116 #include "resolv_cache.h"
117 #include "stats.h"
118 #include "stats.pb.h"
119 #include "util.h"
120 
121 using namespace std::chrono_literals;
122 // TODO: use the namespace something like android::netd_resolv for libnetd_resolv
123 using android::base::ErrnoError;
124 using android::base::Result;
125 using android::base::unique_fd;
126 using android::net::CacheStatus;
127 using android::net::DnsQueryEvent;
128 using android::net::DnsTlsDispatcher;
129 using android::net::DnsTlsServer;
130 using android::net::DnsTlsTransport;
131 using android::net::Experiments;
132 using android::net::IpVersion;
133 using android::net::IV_IPV4;
134 using android::net::IV_IPV6;
135 using android::net::IV_UNKNOWN;
136 using android::net::LinuxErrno;
137 using android::net::NetworkDnsEventReported;
138 using android::net::NS_T_AAAA;
139 using android::net::NS_T_INVALID;
140 using android::net::NsRcode;
141 using android::net::NsType;
142 using android::net::PrivateDnsConfiguration;
143 using android::net::PrivateDnsMode;
144 using android::net::PrivateDnsStatus;
145 using android::net::PROTO_DOH;
146 using android::net::PROTO_MDNS;
147 using android::net::PROTO_TCP;
148 using android::net::PROTO_UDP;
149 using android::netdutils::IPSockAddr;
150 using android::netdutils::Slice;
151 using android::netdutils::Stopwatch;
152 using std::span;
153 
154 // Order matters: we put IPv6 first to prioritize that.
155 const std::vector<IPSockAddr> mdns_addrs = {IPSockAddr::toIPSockAddr("ff02::fb", 5353),
156                                             IPSockAddr::toIPSockAddr("224.0.0.251", 5353)};
157 
158 static int setupUdpSocket(ResState* statp, const sockaddr* sockap, unique_fd* fd_out, int* terrno);
159 static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
160                    int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, int* rcode);
161 static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
162                    int* terrno, size_t ns, int* rcode);
163 static int send_mdns(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* terrno,
164                      int* rcode, IPSockAddr* receivedMdnsAddr);
165 static void dump_error(const char*, const struct sockaddr*);
166 
167 static int sock_eq(struct sockaddr*, struct sockaddr*);
168 static int connect_with_timeout(int sock, const struct sockaddr* nsap, socklen_t salen,
169                                 const struct timespec timeout);
170 static int retrying_poll(const int sock, short events, const struct timespec* finish);
171 static int res_private_dns_send(ResState*, const Slice query, const Slice answer, int* rcode,
172                                 bool* fallback);
173 static int res_tls_send(const std::list<DnsTlsServer>& tlsServers, ResState*, const Slice query,
174                         const Slice answer, int* rcode, PrivateDnsMode mode);
175 static ssize_t res_doh_send(ResState*, const Slice query, const Slice answer, int* rcode);
176 static int elapsedTimeInMs(const timespec& from);
177 
getQueryType(span<const uint8_t> msg)178 NsType getQueryType(span<const uint8_t> msg) {
179     ns_msg handle;
180     ns_rr rr;
181     if (ns_initparse(msg.data(), msg.size(), &handle) < 0 ||
182         ns_parserr(&handle, ns_s_qd, 0, &rr) < 0) {
183         return NS_T_INVALID;
184     }
185     return static_cast<NsType>(ns_rr_type(rr));
186 }
187 
ipFamilyToIPVersion(const int ipFamily)188 IpVersion ipFamilyToIPVersion(const int ipFamily) {
189     switch (ipFamily) {
190         case AF_INET:
191             return IV_IPV4;
192         case AF_INET6:
193             return IV_IPV6;
194         default:
195             return IV_UNKNOWN;
196     }
197 }
198 
199 // BEGIN: Code copied from ISC eventlib
200 // TODO: move away from this code
201 #define BILLION 1000000000
202 
evConsTime(time_t sec,long nsec)203 static struct timespec evConsTime(time_t sec, long nsec) {
204     struct timespec x;
205 
206     x.tv_sec = sec;
207     x.tv_nsec = nsec;
208     return (x);
209 }
210 
evAddTime(struct timespec addend1,struct timespec addend2)211 static struct timespec evAddTime(struct timespec addend1, struct timespec addend2) {
212     struct timespec x;
213 
214     x.tv_sec = addend1.tv_sec + addend2.tv_sec;
215     x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
216     if (x.tv_nsec >= BILLION) {
217         x.tv_sec++;
218         x.tv_nsec -= BILLION;
219     }
220     return (x);
221 }
222 
evSubTime(struct timespec minuend,struct timespec subtrahend)223 static struct timespec evSubTime(struct timespec minuend, struct timespec subtrahend) {
224     struct timespec x;
225 
226     x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
227     if (minuend.tv_nsec >= subtrahend.tv_nsec)
228         x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
229     else {
230         x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
231         x.tv_sec--;
232     }
233     return (x);
234 }
235 
evCmpTime(struct timespec a,struct timespec b)236 static int evCmpTime(struct timespec a, struct timespec b) {
237 #define SGN(x) ((x) < 0 ? (-1) : (x) > 0 ? (1) : (0));
238     time_t s = a.tv_sec - b.tv_sec;
239     long n;
240 
241     if (s != 0) return SGN(s);
242 
243     n = a.tv_nsec - b.tv_nsec;
244     return SGN(n);
245 }
246 
evNowTime(void)247 static struct timespec evNowTime(void) {
248     struct timespec tsnow;
249     clock_gettime(CLOCK_REALTIME, &tsnow);
250     return tsnow;
251 }
252 
253 // END: Code copied from ISC eventlib
254 
255 /* BIONIC-BEGIN: implement source port randomization */
random_bind(int s,int family)256 static int random_bind(int s, int family) {
257     sockaddr_union u;
258     int j;
259     socklen_t slen;
260 
261     /* clear all, this also sets the IP4/6 address to 'any' */
262     memset(&u, 0, sizeof u);
263 
264     switch (family) {
265         case AF_INET:
266             u.sin.sin_family = family;
267             slen = sizeof u.sin;
268             break;
269         case AF_INET6:
270             u.sin6.sin6_family = family;
271             slen = sizeof u.sin6;
272             break;
273         default:
274             errno = EPROTO;
275             return -1;
276     }
277 
278     /* first try to bind to a random source port a few times */
279     for (j = 0; j < 10; j++) {
280         /* find a random port between 1025 .. 65534 */
281         int port = 1025 + (arc4random_uniform(65535 - 1025));
282         // RFC 6762 section 5.1: Don't use 5353 source port on one-shot Multicast DNS queries. DNS
283         // resolver does not fully compliant mDNS.
284         if (port == 5353) continue;
285 
286         if (family == AF_INET)
287             u.sin.sin_port = htons(port);
288         else
289             u.sin6.sin6_port = htons(port);
290 
291         if (!bind(s, &u.sa, slen)) return 0;
292     }
293 
294     // nothing after 10 attempts, our network table is probably busy
295     // let the system decide which port is best
296     if (family == AF_INET)
297         u.sin.sin_port = 0;
298     else
299         u.sin6.sin6_port = 0;
300 
301     return bind(s, &u.sa, slen);
302 }
303 /* BIONIC-END */
304 
305 // Disables all nameservers other than selectedServer
res_set_usable_server(int selectedServer,int nscount,bool usable_servers[])306 static void res_set_usable_server(int selectedServer, int nscount, bool usable_servers[]) {
307     int usableIndex = 0;
308     for (int ns = 0; ns < nscount; ns++) {
309         if (usable_servers[ns]) ++usableIndex;
310         if (usableIndex != selectedServer) usable_servers[ns] = false;
311     }
312 }
313 
314 // Looks up the nameserver address in res.nsaddrs[], returns the ns number if found, otherwise -1.
res_ourserver_p(ResState * statp,const sockaddr * sa)315 static int res_ourserver_p(ResState* statp, const sockaddr* sa) {
316     const sockaddr_in *inp, *srv;
317     const sockaddr_in6 *in6p, *srv6;
318     int ns = 0;
319     switch (sa->sa_family) {
320         case AF_INET:
321             inp = (const struct sockaddr_in*) (const void*) sa;
322 
323             for (const IPSockAddr& ipsa : statp->nsaddrs) {
324                 sockaddr_storage ss = ipsa;
325                 srv = reinterpret_cast<sockaddr_in*>(&ss);
326                 if (srv->sin_family == inp->sin_family && srv->sin_port == inp->sin_port &&
327                     (srv->sin_addr.s_addr == INADDR_ANY ||
328                      srv->sin_addr.s_addr == inp->sin_addr.s_addr))
329                     return ns;
330                 ++ns;
331             }
332             break;
333         case AF_INET6:
334             in6p = (const struct sockaddr_in6*) (const void*) sa;
335             for (const IPSockAddr& ipsa : statp->nsaddrs) {
336                 sockaddr_storage ss = ipsa;
337                 srv6 = reinterpret_cast<sockaddr_in6*>(&ss);
338                 if (srv6->sin6_family == in6p->sin6_family && srv6->sin6_port == in6p->sin6_port &&
339 #ifdef HAVE_SIN6_SCOPE_ID
340                     (srv6->sin6_scope_id == 0 || srv6->sin6_scope_id == in6p->sin6_scope_id) &&
341 #endif
342                     (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
343                      IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
344                     return ns;
345                 ++ns;
346             }
347             break;
348         default:
349             break;
350     }
351     return -1;
352 }
353 
354 /* int
355  * res_nameinquery(name, type, cl, msg, eom)
356  *	look for (name, type, cl) in the query section of packet (msg, eom)
357  * requires:
358  *	msg + HFIXEDSZ <= eom
359  * returns:
360  *	-1 : format error
361  *	0  : not found
362  *	>0 : found
363  * author:
364  *	paul vixie, 29may94
365  */
res_nameinquery(const char * name,int type,int cl,const uint8_t * msg,const uint8_t * eom)366 int res_nameinquery(const char* name, int type, int cl, const uint8_t* msg, const uint8_t* eom) {
367     const uint8_t* cp = msg + HFIXEDSZ;
368     int qdcount = ntohs(((const HEADER*)(const void*)msg)->qdcount);
369 
370     while (qdcount-- > 0) {
371         char tname[MAXDNAME + 1];
372         int n = dn_expand(msg, eom, cp, tname, sizeof tname);
373         if (n < 0) return (-1);
374         cp += n;
375         if (cp + 2 * INT16SZ > eom) return (-1);
376         int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
377         cp += INT16SZ;
378         int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
379         cp += INT16SZ;
380         if (ttype == type && tclass == cl && ns_samename(tname, name) == 1) return (1);
381     }
382     return (0);
383 }
384 
385 /* int
386  * res_queriesmatch(buf1, eom1, buf2, eom2)
387  *	is there a 1:1 mapping of (name,type,class)
388  *	in (buf1,eom1) and (buf2,eom2)?
389  * returns:
390  *	-1 : format error
391  *	0  : not a 1:1 mapping
392  *	>0 : is a 1:1 mapping
393  * author:
394  *	paul vixie, 29may94
395  */
res_queriesmatch(const uint8_t * buf1,const uint8_t * eom1,const uint8_t * buf2,const uint8_t * eom2)396 int res_queriesmatch(const uint8_t* buf1, const uint8_t* eom1, const uint8_t* buf2,
397                      const uint8_t* eom2) {
398     const uint8_t* cp = buf1 + HFIXEDSZ;
399     int qdcount = ntohs(((const HEADER*) (const void*) buf1)->qdcount);
400 
401     if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2) return (-1);
402 
403     /*
404      * Only header section present in replies to
405      * dynamic update packets.
406      */
407     if ((((const HEADER*) (const void*) buf1)->opcode == ns_o_update) &&
408         (((const HEADER*) (const void*) buf2)->opcode == ns_o_update))
409         return (1);
410 
411     if (qdcount != ntohs(((const HEADER*) (const void*) buf2)->qdcount)) return (0);
412     while (qdcount-- > 0) {
413         char tname[MAXDNAME + 1];
414         int n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
415         if (n < 0) return (-1);
416         cp += n;
417         if (cp + 2 * INT16SZ > eom1) return (-1);
418         int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
419         cp += INT16SZ;
420         int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
421         cp += INT16SZ;
422         if (!res_nameinquery(tname, ttype, tclass, buf2, eom2)) return (0);
423     }
424     return (1);
425 }
426 
addDnsQueryEvent(NetworkDnsEventReported * event)427 static DnsQueryEvent* addDnsQueryEvent(NetworkDnsEventReported* event) {
428     return event->mutable_dns_query_events()->add_dns_query_event();
429 }
430 
isNetworkRestricted(int terrno)431 static bool isNetworkRestricted(int terrno) {
432     // It's possible that system was in some network restricted mode, which blocked
433     // the operation of sending packet and resulted in EPERM errno.
434     // It would be no reason to keep retrying on that case.
435     // TODO: Check the system status to know if network restricted mode is
436     // enabled.
437     return (terrno == EPERM);
438 }
439 
isClientStreamSocketClosed(std::optional<int> fd)440 static bool isClientStreamSocketClosed(std::optional<int> fd) {
441     if (!fd.has_value()) return false;
442     if (!android::net::Experiments::getInstance()->getFlag("no_retry_after_cancel", 0)) {
443         return false;
444     }
445     struct pollfd fds{
446             // POLLHUP is always included in events but is specified explicitly here
447             .fd = fd.value(),
448             .events = POLLHUP,
449     };
450     return (poll(&fds, 1, /* timeout=*/0) > 0) && (fds.revents & POLLHUP);
451 }
452 
res_nsend(ResState * statp,span<const uint8_t> msg,span<uint8_t> ans,int * rcode,uint32_t flags,std::chrono::milliseconds sleepTimeMs)453 int res_nsend(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* rcode,
454               uint32_t flags, std::chrono::milliseconds sleepTimeMs) {
455     LOG(DEBUG) << __func__;
456 
457     // Should not happen
458     if (ans.size() < HFIXEDSZ) {
459         // TODO: Remove errno once callers stop using it
460         errno = EINVAL;
461         return -EINVAL;
462     }
463     res_pquery(msg);
464 
465     // TODO(b/394031336): Implement caching for mDNS.
466     // The DNS cache keys by netid, ignoring the interface queries get routed towards. This is fine
467     // for unicast DNS requests, where:
468     // 1. We always send a single query per type (i.e., even if a network has multiple interfaces,
469     // we will send a single A and AAAA query).
470     // 2. Results cannot be IPv6 link-local addresses
471     // It is not fine for mDNS, where both things can happen.
472     if (isMdnsResolution(statp->flags)) {
473         // Use an impossible error code as default value.
474         int terrno = ETIME;
475         int resplen = 0;
476         *rcode = RCODE_INTERNAL_ERROR;
477         Stopwatch queryStopwatch;
478         IPSockAddr receivedMdnsAddr;
479         resplen = send_mdns(statp, msg, ans, &terrno, rcode, &receivedMdnsAddr);
480         DnsQueryEvent* mDnsQueryEvent = addDnsQueryEvent(statp->event);
481         mDnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(RESOLV_CACHE_NOTFOUND));
482         mDnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
483         mDnsQueryEvent->set_ip_version(ipFamilyToIPVersion(receivedMdnsAddr.family()));
484         mDnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
485         mDnsQueryEvent->set_protocol(PROTO_MDNS);
486         mDnsQueryEvent->set_type(getQueryType(msg));
487         mDnsQueryEvent->set_linux_errno(static_cast<LinuxErrno>(terrno));
488         resolv_stats_add(statp->netid, receivedMdnsAddr, mDnsQueryEvent);
489 
490         if (resplen > 0) {
491             LOG(DEBUG) << __func__ << ": got answer from mDNS:";
492             res_pquery(ans.first(resplen));
493             return resplen;
494         }
495     }
496 
497     int anslen = 0;
498     Stopwatch cacheStopwatch;
499     ResolvCacheStatus cache_status = resolv_cache_lookup(statp->netid, msg, ans, &anslen, flags);
500     const int32_t cacheLatencyUs = saturate_cast<int32_t>(cacheStopwatch.timeTakenUs());
501     if (cache_status == RESOLV_CACHE_FOUND) {
502         HEADER* hp = (HEADER*)(void*)ans.data();
503         *rcode = hp->rcode;
504         DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
505         dnsQueryEvent->set_latency_micros(cacheLatencyUs);
506         dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
507         dnsQueryEvent->set_type(getQueryType(msg));
508         return anslen;
509     } else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
510         // had a cache miss for a known network, so populate the thread private
511         // data so the normal resolve path can do its thing
512         resolv_populate_res_for_net(statp);
513     }
514 
515     if (statp->nameserverCount() == 0) {
516         // We have no nameservers configured and it's not a MDNS resolution, so there's no
517         // point trying. Tell the cache the query failed, or any retries and anyone else
518         // asking the same question will block for PENDING_REQUEST_TIMEOUT seconds instead
519         // of failing fast.
520         _resolv_cache_query_failed(statp->netid, msg, flags);
521         LOG(DEBUG) << __func__ << ": no nameserver";
522         // TODO: Remove errno once callers stop using it
523         errno = ESRCH;
524         return -ESRCH;
525     }
526 
527     // Private DNS
528     if (!(statp->netcontext_flags & NET_CONTEXT_FLAG_USE_LOCAL_NAMESERVERS)) {
529         bool fallback = false;
530         int resplen =
531                 res_private_dns_send(statp, Slice(const_cast<uint8_t*>(msg.data()), msg.size()),
532                                      Slice(ans.data(), ans.size()), rcode, &fallback);
533         if (resplen > 0) {
534             LOG(DEBUG) << __func__ << ": got answer from Private DNS";
535             res_pquery(ans.first(resplen));
536             if (cache_status == RESOLV_CACHE_NOTFOUND) {
537                 resolv_cache_add(statp->netid, msg, ans.first(resplen));
538             }
539             return resplen;
540         }
541         if (!fallback) {
542             _resolv_cache_query_failed(statp->netid, msg, flags);
543             LOG(DEBUG) << __func__ << ": private DNS failed";
544             return -ETIMEDOUT;
545         }
546     }
547 
548     // If parallel_lookup is enabled, it might be required to wait some time to avoid
549     // gateways from dropping packets if queries are sent too close together.
550     if (sleepTimeMs != 0ms) {
551         std::this_thread::sleep_for(sleepTimeMs);
552     }
553 
554     res_stats stats[MAXNS]{};
555     res_params params;
556     int revision_id = resolv_cache_get_resolver_stats(statp->netid, &params, stats, statp->nsaddrs);
557     if (revision_id < 0) {
558         LOG(ERROR) << __func__ << ": revision_id < 0";
559         // TODO: Remove errno once callers stop using it
560         errno = ESRCH;
561         return -ESRCH;
562     }
563 
564     bool usable_servers[MAXNS];
565     int usableServersCount = android_net_res_stats_get_usable_servers(
566             &params, stats, statp->nameserverCount(), usable_servers);
567 
568     if (statp->sort_nameservers) {
569         // It's unnecessary to mark a DNS server as unusable since broken servers will be less
570         // likely to be chosen.
571         for (int i = 0; i < statp->nameserverCount(); i++) {
572             usable_servers[i] = true;
573         }
574     }
575 
576     // TODO: Let it always choose the first nameserver when sort_nameservers is enabled.
577     if ((flags & ANDROID_RESOLV_NO_RETRY) && usableServersCount > 1) {
578         auto hp = reinterpret_cast<const HEADER*>(msg.data());
579 
580         // Select a random server based on the query id
581         int selectedServer = (hp->id % usableServersCount) + 1;
582         res_set_usable_server(selectedServer, statp->nameserverCount(), usable_servers);
583     }
584 
585     // Send request, RETRY times, or until successful.
586     int retryTimes = (flags & ANDROID_RESOLV_NO_RETRY) ? 1 : params.retry_count;
587     int useTcp = msg.size() > PACKETSZ;
588     int gotsomewhere = 0;
589 
590     // Use an impossible error code as default value
591     int terrno = ETIME;
592     // plaintext DNS
593     for (int attempt = 0; attempt < retryTimes; ++attempt) {
594         if (attempt > 0 && isClientStreamSocketClosed(statp->app_socket)) {
595             // Stop retrying if the remote end is not listening for answers anymore. Only do that
596             // for retries and not the initial query to minimize latency (although the check is very
597             // cheap) in the vast majority of cases where queries are not immediately cancelled, and
598             // to make testing easier so tests can cancel immediately and reliably expect one query.
599             // This could also cancel before the first attempt if private DNS was already tried and
600             // this is a fallback, but this is not done here for simplicity.
601             break;
602         }
603         for (size_t ns = 0; ns < statp->nsaddrs.size(); ++ns) {
604             if (!usable_servers[ns]) continue;
605 
606             *rcode = RCODE_INTERNAL_ERROR;
607             LOG(DEBUG) << __func__ << ": Querying server (# " << ns + 1
608                        << ") address = " << statp->nsaddrs[ns].toString();
609 
610             ::android::net::Protocol query_proto = useTcp ? PROTO_TCP : PROTO_UDP;
611             const time_t query_time = time(nullptr);
612             int delay = 0;
613             bool fallbackTCP = false;
614             const bool shouldRecordStats = (attempt == 0);
615             int resplen;
616             Stopwatch queryStopwatch;
617             int retry_count_for_event = 0;
618             size_t actualNs = ns;
619             // Use an impossible error code as default value
620             terrno = ETIME;
621             if (useTcp) {
622                 // TCP; at most one attempt per server.
623                 attempt = retryTimes;
624                 resplen = send_vc(statp, &params, msg, ans, &terrno, ns, rcode);
625                 delay = elapsedTimeInMs(statp->tcp_nssock_ts);
626 
627                 if (msg.size() <= PACKETSZ && resplen <= 0 &&
628                     statp->tc_mode == aidl::android::net::IDnsResolver::TC_MODE_UDP_TCP) {
629                     // reset to UDP for next query on next DNS server if resolver is currently doing
630                     // TCP fallback retry and current server does not support TCP connectin
631                     useTcp = false;
632                 }
633                 LOG(INFO) << __func__ << ": used send_vc " << resplen << " terrno: " << terrno;
634             } else {
635                 // UDP
636                 resplen = send_dg(statp, &params, msg, ans, &terrno, &actualNs, &useTcp,
637                                   &gotsomewhere, rcode);
638                 delay = elapsedTimeInMs(statp->udpsocks_ts[actualNs]);
639                 fallbackTCP = useTcp ? true : false;
640                 retry_count_for_event = attempt;
641                 LOG(INFO) << __func__ << ": used send_dg " << resplen << " terrno: " << terrno;
642             }
643 
644             const IPSockAddr& receivedServerAddr = statp->nsaddrs[actualNs];
645             DnsQueryEvent* dnsQueryEvent = addDnsQueryEvent(statp->event);
646             dnsQueryEvent->set_cache_hit(static_cast<CacheStatus>(cache_status));
647             // When |retryTimes| > 1, we cannot actually know the correct latency value if we
648             // received the answer from the previous server. So temporarily set the latency as -1 if
649             // that condition happened.
650             // TODO: make the latency value accurate.
651             dnsQueryEvent->set_latency_micros(
652                     (actualNs == ns) ? saturate_cast<int32_t>(queryStopwatch.timeTakenUs()) : -1);
653             dnsQueryEvent->set_dns_server_index(actualNs);
654             dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(receivedServerAddr.family()));
655             dnsQueryEvent->set_retry_times(retry_count_for_event);
656             dnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
657             dnsQueryEvent->set_protocol(query_proto);
658             dnsQueryEvent->set_type(getQueryType(msg));
659             dnsQueryEvent->set_linux_errno(static_cast<LinuxErrno>(terrno));
660 
661             // Only record stats the first time we try a query. This ensures that
662             // queries that deterministically fail (e.g., a name that always returns
663             // SERVFAIL or times out) do not unduly affect the stats.
664             if (shouldRecordStats) {
665                 // (b/151166599): This is a workaround to prevent that DnsResolver calculates the
666                 // reliability of DNS servers from being broken when network restricted mode is
667                 // enabled.
668                 // TODO: Introduce the new server selection instead of skipping stats recording.
669                 if (!isNetworkRestricted(terrno)) {
670                     res_sample sample;
671                     res_stats_set_sample(&sample, query_time, *rcode, delay);
672                     resolv_cache_add_resolver_stats_sample(statp->netid, revision_id,
673                                                            receivedServerAddr, sample,
674                                                            params.max_samples);
675                     resolv_stats_add(statp->netid, receivedServerAddr, dnsQueryEvent);
676                 }
677             }
678 
679             if (resplen == 0) continue;
680             if (fallbackTCP) {
681                 ns--;
682                 continue;
683             }
684             if (resplen < 0) {
685                 _resolv_cache_query_failed(statp->netid, msg, flags);
686                 statp->closeSockets();
687                 return -terrno;
688             }
689 
690             LOG(DEBUG) << __func__ << ": got answer:";
691             res_pquery(ans.first(resplen));
692 
693             if (cache_status == RESOLV_CACHE_NOTFOUND) {
694                 resolv_cache_add(statp->netid, msg, std::span(ans.data(), resplen));
695             }
696             statp->closeSockets();
697             return (resplen);
698         }  // for each ns
699     }  // for each retry
700     statp->closeSockets();
701     terrno = useTcp ? terrno : gotsomewhere ? ETIMEDOUT : ECONNREFUSED;
702     // TODO: Remove errno once callers stop using it
703     errno = useTcp ? terrno
704                    : gotsomewhere ? ETIMEDOUT /* no answer obtained */
705                                   : ECONNREFUSED /* no nameservers found */;
706 
707     _resolv_cache_query_failed(statp->netid, msg, flags);
708     return -terrno;
709 }
710 
get_timeout(ResState * statp,const res_params * params,const int addrIndex)711 static struct timespec get_timeout(ResState* statp, const res_params* params, const int addrIndex) {
712     int msec;
713     msec = params->base_timeout_msec << addrIndex;
714     // Legacy algorithm which scales the timeout by nameserver number.
715     // For instance, with 4 nameservers: 5s, 2.5s, 5s, 10s
716     // This has no effect with 1 or 2 nameservers
717     if (addrIndex > 0) {
718         msec /= statp->nameserverCount();
719     }
720     // For safety, don't allow OEMs and experiments to configure a timeout shorter than 1s.
721     if (msec < 1000) {
722         msec = 1000;  // Use at least 1000ms
723     }
724     LOG(DEBUG) << __func__ << ": using timeout of " << msec << " msec";
725 
726     struct timespec result;
727     result.tv_sec = msec / 1000;
728     result.tv_nsec = (msec % 1000) * 1000000;
729     return result;
730 }
731 
send_vc(ResState * statp,res_params * params,span<const uint8_t> msg,span<uint8_t> ans,int * terrno,size_t ns,int * rcode)732 static int send_vc(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
733                    int* terrno, size_t ns, int* rcode) {
734     const HEADER* hp = (const HEADER*)(const void*)msg.data();
735     HEADER* anhp = (HEADER*)(void*)ans.data();
736     struct sockaddr* nsap;
737     int nsaplen;
738     int truncating, connreset, n;
739     uint8_t* cp;
740 
741     LOG(DEBUG) << __func__ << ": using send_vc";
742 
743     // It should never happen, but just in case.
744     if (ns >= statp->nsaddrs.size()) {
745         LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
746         *terrno = EINVAL;
747         return -1;
748     }
749 
750     sockaddr_storage ss = statp->nsaddrs[ns];
751     nsap = reinterpret_cast<sockaddr*>(&ss);
752     nsaplen = sockaddrSize(nsap);
753 
754     connreset = 0;
755 same_ns:
756     truncating = 0;
757 
758     /* Are we still talking to whom we want to talk to? */
759     if (statp->tcp_nssock >= 0 && (statp->flags & RES_F_VC) != 0) {
760         struct sockaddr_storage peer;
761         socklen_t size = sizeof peer;
762         unsigned old_mark;
763         socklen_t mark_size = sizeof(old_mark);
764         if (getpeername(statp->tcp_nssock, (struct sockaddr*)(void*)&peer, &size) < 0 ||
765             !sock_eq((struct sockaddr*)(void*)&peer, nsap) ||
766             getsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &old_mark, &mark_size) < 0 ||
767             old_mark != statp->mark) {
768             statp->closeSockets();
769         }
770     }
771 
772     if (statp->tcp_nssock < 0 || (statp->flags & RES_F_VC) == 0) {
773         if (statp->tcp_nssock >= 0) statp->closeSockets();
774 
775         statp->tcp_nssock.reset(socket(nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0));
776         if (statp->tcp_nssock < 0) {
777             *terrno = errno;
778             PLOG(DEBUG) << __func__ << ": socket(vc): ";
779             switch (errno) {
780                 case EPROTONOSUPPORT:
781                 case EPFNOSUPPORT:
782                 case EAFNOSUPPORT:
783                     return 0;
784                 default:
785                     return -1;
786             }
787         }
788         statp->tcp_nssock_ts = evNowTime();
789         const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
790         resolv_tag_socket(statp->tcp_nssock, uid, statp->pid);
791         if (statp->mark != MARK_UNSET) {
792             if (setsockopt(statp->tcp_nssock, SOL_SOCKET, SO_MARK, &statp->mark,
793                            sizeof(statp->mark)) < 0) {
794                 *terrno = errno;
795                 PLOG(DEBUG) << __func__ << ": setsockopt: ";
796                 return -1;
797             }
798         }
799         errno = 0;
800         if (random_bind(statp->tcp_nssock, nsap->sa_family) < 0) {
801             *terrno = errno;
802             dump_error("bind/vc", nsap);
803             statp->closeSockets();
804             return (0);
805         }
806         if (connect_with_timeout(statp->tcp_nssock, nsap, (socklen_t)nsaplen,
807                                  get_timeout(statp, params, ns)) < 0) {
808             *terrno = errno;
809             dump_error("connect/vc", nsap);
810             statp->closeSockets();
811             /*
812              * The way connect_with_timeout() is implemented prevents us from reliably
813              * determining whether this was really a timeout or e.g. ECONNREFUSED. Since
814              * currently both cases are handled in the same way, there is no need to
815              * change this (yet). If we ever need to reliably distinguish between these
816              * cases, both connect_with_timeout() and retrying_poll() need to be
817              * modified, though.
818              */
819             *rcode = RCODE_TIMEOUT;
820             return (0);
821         }
822         statp->flags |= RES_F_VC;
823     }
824 
825     /*
826      * Send length & message
827      */
828     uint16_t len = htons(static_cast<uint16_t>(msg.size()));
829     const iovec iov[] = {
830             {.iov_base = &len, .iov_len = INT16SZ},
831             {.iov_base = const_cast<uint8_t*>(msg.data()),
832              .iov_len = static_cast<size_t>(msg.size())},
833     };
834     if (writev(statp->tcp_nssock, iov, 2) != static_cast<ptrdiff_t>(INT16SZ + msg.size())) {
835         *terrno = errno;
836         PLOG(DEBUG) << __func__ << ": write failed: ";
837         statp->closeSockets();
838         return (0);
839     }
840     /*
841      * Receive length & response
842      */
843 read_len:
844     cp = ans.data();
845     len = INT16SZ;
846     while ((n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
847         cp += n;
848         if ((len -= n) == 0) break;
849     }
850     if (n <= 0) {
851         *terrno = errno;
852         PLOG(DEBUG) << __func__ << ": read failed: ";
853         statp->closeSockets();
854         /*
855          * A long running process might get its TCP
856          * connection reset if the remote server was
857          * restarted.  Requery the server instead of
858          * trying a new one.  When there is only one
859          * server, this means that a query might work
860          * instead of failing.  We only allow one reset
861          * per query to prevent looping.
862          */
863         if (*terrno == ECONNRESET && !connreset) {
864             connreset = 1;
865             goto same_ns;
866         }
867         return (0);
868     }
869     uint16_t resplen = ntohs(*reinterpret_cast<const uint16_t*>(ans.data()));
870     if (resplen > ans.size()) {
871         LOG(DEBUG) << __func__ << ": response truncated";
872         truncating = 1;
873         len = ans.size();
874     } else
875         len = resplen;
876     if (len < HFIXEDSZ) {
877         /*
878          * Undersized message.
879          */
880         LOG(DEBUG) << __func__ << ": undersized: " << len;
881         *terrno = EMSGSIZE;
882         statp->closeSockets();
883         return (0);
884     }
885     cp = ans.data();
886     while (len != 0 && (n = read(statp->tcp_nssock, (char*)cp, (size_t)len)) > 0) {
887         cp += n;
888         len -= n;
889     }
890     if (n <= 0) {
891         *terrno = errno;
892         PLOG(DEBUG) << __func__ << ": read(vc): ";
893         statp->closeSockets();
894         return (0);
895     }
896 
897     if (truncating) {
898         /*
899          * Flush rest of answer so connection stays in synch.
900          */
901         anhp->tc = 1;
902         len = resplen - ans.size();
903         while (len != 0) {
904             char junk[PACKETSZ];
905 
906             n = read(statp->tcp_nssock, junk, (len > sizeof junk) ? sizeof junk : len);
907             if (n > 0)
908                 len -= n;
909             else
910                 break;
911         }
912         LOG(WARNING) << __func__ << ": resplen " << resplen << " exceeds buf size " << ans.size();
913         // return size should never exceed container size
914         resplen = ans.size();
915     }
916     /*
917      * If the calling application has bailed out of
918      * a previous call and failed to arrange to have
919      * the circuit closed or the server has got
920      * itself confused, then drop the packet and
921      * wait for the correct one.
922      */
923     if (hp->id != anhp->id) {
924         LOG(DEBUG) << __func__ << ": ld answer (unexpected):";
925         res_pquery({ans.data(), resplen});
926         goto read_len;
927     }
928 
929     /*
930      * All is well, or the error is fatal.  Signal that the
931      * next nameserver ought not be tried.
932      */
933     if (resplen > 0) {
934         *rcode = anhp->rcode;
935     }
936     *terrno = 0;
937     return (resplen);
938 }
939 
940 /* return -1 on error (errno set), 0 on success */
connect_with_timeout(int sock,const sockaddr * nsap,socklen_t salen,const timespec timeout)941 static int connect_with_timeout(int sock, const sockaddr* nsap, socklen_t salen,
942                                 const timespec timeout) {
943     int res, origflags;
944 
945     origflags = fcntl(sock, F_GETFL, 0);
946     fcntl(sock, F_SETFL, origflags | O_NONBLOCK);
947 
948     res = connect(sock, nsap, salen);
949     if (res < 0 && errno != EINPROGRESS) {
950         res = -1;
951         goto done;
952     }
953     if (res != 0) {
954         timespec now = evNowTime();
955         timespec finish = evAddTime(now, timeout);
956         LOG(DEBUG) << __func__ << ": " << sock << " send_vc";
957         res = retrying_poll(sock, POLLIN | POLLOUT, &finish);
958         if (res <= 0) {
959             res = -1;
960         }
961     }
962 done:
963     fcntl(sock, F_SETFL, origflags);
964     LOG(INFO) << __func__ << ": " << sock << " connect_with_const timeout returning " << res;
965     return res;
966 }
967 
retrying_poll(const int sock,const short events,const struct timespec * finish)968 static int retrying_poll(const int sock, const short events, const struct timespec* finish) {
969     struct timespec now, timeout;
970 
971 retry:
972     LOG(DEBUG) << __func__ << ": " << sock << " retrying_poll";
973 
974     now = evNowTime();
975     if (evCmpTime(*finish, now) > 0)
976         timeout = evSubTime(*finish, now);
977     else
978         timeout = evConsTime(0L, 0L);
979     struct pollfd fds = {.fd = sock, .events = events};
980     int n = ppoll(&fds, 1, &timeout, /*__mask=*/NULL);
981     if (n == 0) {
982         LOG(DEBUG) << __func__ << ": " << sock << " retrying_poll timeout";
983         errno = ETIMEDOUT;
984         return 0;
985     }
986     if (n < 0) {
987         if (errno == EINTR) goto retry;
988         PLOG(INFO) << __func__ << ": " << sock << " retrying_poll failed";
989         return n;
990     }
991     if (fds.revents & (POLLIN | POLLOUT | POLLERR)) {
992         int error;
993         socklen_t len = sizeof(error);
994         if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0 || error) {
995             errno = error;
996             PLOG(INFO) << __func__ << ": " << sock << " retrying_poll getsockopt failed";
997             return -1;
998         }
999     }
1000     LOG(DEBUG) << __func__ << ": " << sock << " retrying_poll returning " << n;
1001     return n;
1002 }
1003 
extractUdpFdset(ResState * statp,const short events=POLLIN)1004 static std::vector<pollfd> extractUdpFdset(ResState* statp, const short events = POLLIN) {
1005     std::vector<pollfd> fdset(statp->nsaddrs.size());
1006     for (size_t i = 0; i < statp->nsaddrs.size(); ++i) {
1007         fdset[i] = {.fd = statp->udpsocks[i], .events = events};
1008     }
1009     return fdset;
1010 }
1011 
udpRetryingPoll(ResState * statp,const timespec * finish)1012 static Result<std::vector<int>> udpRetryingPoll(ResState* statp, const timespec* finish) {
1013     for (;;) {
1014         LOG(DEBUG) << __func__ << ": poll";
1015         timespec start_time = evNowTime();
1016         timespec timeout = (evCmpTime(*finish, start_time) > 0) ? evSubTime(*finish, start_time)
1017                                                                 : evConsTime(0L, 0L);
1018         std::vector<pollfd> fdset = extractUdpFdset(statp);
1019         const int n = ppoll(fdset.data(), fdset.size(), &timeout, /*__mask=*/nullptr);
1020         if (n <= 0) {
1021             if (errno == EINTR && n < 0) continue;
1022             if (n == 0) errno = ETIMEDOUT;
1023             PLOG(INFO) << __func__ << ": failed";
1024             return ErrnoError();
1025         }
1026         std::vector<int> fdsToRead;
1027         for (const auto& pollfd : fdset) {
1028             if (pollfd.revents & (POLLIN | POLLERR)) {
1029                 fdsToRead.push_back(pollfd.fd);
1030             }
1031         }
1032         LOG(DEBUG) << __func__ << ": "
1033                    << " returning fd size: " << fdsToRead.size();
1034         return fdsToRead;
1035     }
1036 }
1037 
udpRetryingPollWrapper(ResState * statp,int addrInfo,const timespec * finish)1038 static Result<std::vector<int>> udpRetryingPollWrapper(ResState* statp, int addrInfo,
1039                                                        const timespec* finish) {
1040     const bool keepListeningUdp =
1041             android::net::Experiments::getInstance()->getFlag("keep_listening_udp", 0);
1042     if (keepListeningUdp) return udpRetryingPoll(statp, finish);
1043 
1044     if (int n = retrying_poll(statp->udpsocks[addrInfo], POLLIN, finish); n <= 0) {
1045         return ErrnoError();
1046     }
1047     return std::vector<int>{statp->udpsocks[addrInfo]};
1048 }
1049 
ignoreInvalidAnswer(ResState * statp,const sockaddr_storage & from,span<const uint8_t> msg,span<uint8_t> ans,int * receivedFromNs)1050 bool ignoreInvalidAnswer(ResState* statp, const sockaddr_storage& from, span<const uint8_t> msg,
1051                          span<uint8_t> ans, int* receivedFromNs) {
1052     const HEADER* hp = (const HEADER*)(const void*)msg.data();
1053     HEADER* anhp = (HEADER*)(void*)ans.data();
1054     if (hp->id != anhp->id) {
1055         // response from old query, ignore it.
1056         LOG(DEBUG) << __func__ << ": old answer:";
1057         return true;
1058     }
1059     if (*receivedFromNs = res_ourserver_p(statp, (sockaddr*)(void*)&from); *receivedFromNs < 0) {
1060         // response from wrong server? ignore it.
1061         LOG(DEBUG) << __func__ << ": not our server:";
1062         return true;
1063     }
1064     if (!res_queriesmatch(msg.data(), msg.data() + msg.size(), ans.data(),
1065                           ans.data() + ans.size())) {
1066         // response contains wrong query? ignore it.
1067         LOG(DEBUG) << __func__ << ": wrong query name:";
1068         return true;
1069     }
1070     return false;
1071 }
1072 
1073 // return  1 - setup udp socket success.
1074 // return  0 - bind error, protocol error.
1075 // return -1 - create socket fail, except |EPROTONOSUPPORT| EPFNOSUPPORT |EAFNOSUPPORT|.
1076 //             set socket option fail.
setupUdpSocket(ResState * statp,const sockaddr * sockap,unique_fd * fd_out,int * terrno)1077 static int setupUdpSocket(ResState* statp, const sockaddr* sockap, unique_fd* fd_out, int* terrno) {
1078     fd_out->reset(socket(sockap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0));
1079 
1080     if (*fd_out < 0) {
1081         *terrno = errno;
1082         PLOG(ERROR) << __func__ << ": socket: ";
1083         switch (errno) {
1084             case EPROTONOSUPPORT:
1085             case EPFNOSUPPORT:
1086             case EAFNOSUPPORT:
1087                 return 0;
1088             default:
1089                 return -1;
1090         }
1091     }
1092     const uid_t uid = statp->enforce_dns_uid ? AID_DNS : statp->uid;
1093     resolv_tag_socket(*fd_out, uid, statp->pid);
1094     if (statp->mark != MARK_UNSET) {
1095         if (setsockopt(*fd_out, SOL_SOCKET, SO_MARK, &(statp->mark), sizeof(statp->mark)) < 0) {
1096             *terrno = errno;
1097             return -1;
1098         }
1099     }
1100 
1101     if (random_bind(*fd_out, sockap->sa_family) < 0) {
1102         *terrno = errno;
1103         dump_error("bind", sockap);
1104         return 0;
1105     }
1106     return 1;
1107 }
1108 
send_dg(ResState * statp,res_params * params,span<const uint8_t> msg,span<uint8_t> ans,int * terrno,size_t * ns,int * v_circuit,int * gotsomewhere,int * rcode)1109 static int send_dg(ResState* statp, res_params* params, span<const uint8_t> msg, span<uint8_t> ans,
1110                    int* terrno, size_t* ns, int* v_circuit, int* gotsomewhere, int* rcode) {
1111     // It should never happen, but just in case.
1112     if (*ns >= statp->nsaddrs.size()) {
1113         LOG(ERROR) << __func__ << ": Out-of-bound indexing: " << ns;
1114         *terrno = EINVAL;
1115         return -1;
1116     }
1117 
1118     const sockaddr_storage ss = statp->nsaddrs[*ns];
1119     const sockaddr* nsap = reinterpret_cast<const sockaddr*>(&ss);
1120 
1121     if (statp->udpsocks[*ns] == -1) {
1122         int result = setupUdpSocket(statp, nsap, &statp->udpsocks[*ns], terrno);
1123         if (result <= 0) return result;
1124         statp->udpsocks_ts[*ns] = evNowTime();
1125 
1126         // Use a "connected" datagram socket to receive an ECONNREFUSED error
1127         // on the next socket operation when the server responds with an
1128         // ICMP port-unreachable error. This way we can detect the absence of
1129         // a nameserver without timing out.
1130         if (connect(statp->udpsocks[*ns], nsap, sockaddrSize(nsap)) < 0) {
1131             *terrno = errno;
1132             dump_error("connect(dg)", nsap);
1133             statp->closeSockets();
1134             return 0;
1135         }
1136         LOG(DEBUG) << __func__ << ": new DG socket";
1137     }
1138     if (send(statp->udpsocks[*ns], msg.data(), msg.size(), 0) !=
1139         static_cast<ptrdiff_t>(msg.size())) {
1140         *terrno = errno;
1141         PLOG(DEBUG) << __func__ << ": send: ";
1142         statp->closeSockets();
1143         return 0;
1144     }
1145 
1146     timespec timeout = get_timeout(statp, params, *ns);
1147     timespec start_time = evNowTime();
1148     timespec finish = evAddTime(start_time, timeout);
1149     for (;;) {
1150         // Wait for reply.
1151         auto result = udpRetryingPollWrapper(statp, *ns, &finish);
1152 
1153         if (!result.has_value()) {
1154             const bool isTimeout = (result.error().code() == ETIMEDOUT);
1155             *rcode = (isTimeout) ? RCODE_TIMEOUT : *rcode;
1156             *terrno = (isTimeout) ? ETIMEDOUT : errno;
1157             *gotsomewhere = (isTimeout) ? 1 : *gotsomewhere;
1158             // Leave the UDP sockets open on timeout so we can keep listening for
1159             // a late response from this server while retrying on the next server.
1160             if (!isTimeout) statp->closeSockets();
1161             LOG(DEBUG) << __func__ << ": " << (isTimeout ? "timeout" : "poll");
1162             return 0;
1163         }
1164         bool needRetry = false;
1165         for (int fd : result.value()) {
1166             needRetry = false;
1167             sockaddr_storage from;
1168             socklen_t fromlen = sizeof(from);
1169             int resplen =
1170                     recvfrom(fd, ans.data(), ans.size(), 0, (sockaddr*)(void*)&from, &fromlen);
1171             if (resplen <= 0) {
1172                 *terrno = errno;
1173                 PLOG(DEBUG) << __func__ << ": recvfrom: ";
1174                 continue;
1175             }
1176             *gotsomewhere = 1;
1177             if (resplen < HFIXEDSZ) {
1178                 // Undersized message.
1179                 LOG(DEBUG) << __func__ << ": undersized: " << resplen;
1180                 *terrno = EMSGSIZE;
1181                 continue;
1182             }
1183             if (resplen > static_cast<ptrdiff_t>(ans.size())) {
1184                 LOG(FATAL) << __func__ << ": invalid resplen (too large): " << resplen;
1185             }
1186 
1187             int receivedFromNs = *ns;
1188             if (needRetry = ignoreInvalidAnswer(statp, from, msg, ans, &receivedFromNs);
1189                 needRetry) {
1190                 res_pquery(ans.first(resplen));
1191                 continue;
1192             }
1193 
1194             HEADER* anhp = (HEADER*)(void*)ans.data();
1195             if (anhp->rcode == FORMERR && (statp->netcontext_flags & NET_CONTEXT_FLAG_USE_EDNS)) {
1196                 //  Do not retry if the server do not understand EDNS0.
1197                 //  The case has to be captured here, as FORMERR packet do not
1198                 //  carry query section, hence res_queriesmatch() returns 0.
1199                 LOG(DEBUG) << __func__ << ": server rejected query with EDNS0:";
1200                 res_pquery(ans.first(resplen));
1201                 // record the error
1202                 statp->flags |= RES_F_EDNS0ERR;
1203                 *terrno = EREMOTEIO;
1204                 continue;
1205             }
1206 
1207             if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
1208                 LOG(DEBUG) << __func__ << ": server rejected query:";
1209                 res_pquery(ans.first(resplen));
1210                 *rcode = anhp->rcode;
1211                 continue;
1212             }
1213             if (anhp->tc) {
1214                 // To get the rest of answer,
1215                 // use TCP with same server.
1216                 LOG(DEBUG) << __func__ << ": truncated answer";
1217                 *terrno = E2BIG;
1218                 *v_circuit = 1;
1219                 return 1;
1220             }
1221             // All is well, or the error is fatal. Signal that the
1222             // next nameserver ought not be tried.
1223 
1224             *rcode = anhp->rcode;
1225             *ns = receivedFromNs;
1226             *terrno = 0;
1227             return resplen;
1228         }
1229         if (!needRetry) return 0;
1230     }
1231 }
1232 
1233 // return length - when receiving valid packets.
1234 // return 0      - when mdns packets transfer error.
send_mdns(ResState * statp,span<const uint8_t> msg,span<uint8_t> ans,int * terrno,int * rcode,IPSockAddr * receivedMdnsAddr)1235 static int send_mdns(ResState* statp, span<const uint8_t> msg, span<uint8_t> ans, int* terrno,
1236                      int* rcode, IPSockAddr* receivedMdnsAddr) {
1237     for (const auto& mdns_addr : mdns_addrs) {
1238         const sockaddr_storage ss = mdns_addr;
1239         *receivedMdnsAddr = mdns_addr;
1240         const sockaddr* mdnsap = reinterpret_cast<const sockaddr*>(&ss);
1241         unique_fd fd;
1242 
1243         if (setupUdpSocket(statp, mdnsap, &fd, terrno) <= 0) return 0;
1244 
1245         if (statp->target_interface_index_for_mdns != 0) {
1246             if (mdnsap->sa_family == AF_INET) {
1247                 struct ip_mreqn mreqn = {};
1248                 mreqn.imr_ifindex = statp->target_interface_index_for_mdns;
1249                 if (setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF, &mreqn, sizeof(mreqn)) < 0) {
1250                     *terrno = errno;
1251                     continue;
1252                 }
1253             } else if (mdnsap->sa_family == AF_INET6) {
1254                 if (setsockopt(fd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
1255                                &statp->target_interface_index_for_mdns,
1256                                sizeof(statp->target_interface_index_for_mdns)) < 0) {
1257                     *terrno = errno;
1258                     continue;
1259                 }
1260             }
1261         }
1262 
1263         if (sendto(fd, msg.data(), msg.size(), 0, mdnsap, sockaddrSize(mdnsap)) !=
1264             static_cast<ptrdiff_t>(msg.size())) {
1265             *terrno = errno;
1266             continue;
1267         }
1268         // RFC 6762: Typically, the timeout would also be shortened to two or three seconds.
1269         const struct timespec finish = evAddTime(evNowTime(), {2, 2000000});
1270 
1271         // Wait for reply.
1272         if (retrying_poll(fd, POLLIN, &finish) <= 0) {
1273             *terrno = errno;
1274             if (*terrno == ETIMEDOUT) *rcode = RCODE_TIMEOUT;
1275             LOG(ERROR) << __func__ << ": " << ((*terrno == ETIMEDOUT) ? "timeout" : "poll failed");
1276             continue;
1277         }
1278 
1279         sockaddr_storage from;
1280         socklen_t fromlen = sizeof(from);
1281         int resplen = recvfrom(fd, ans.data(), ans.size(), 0, (sockaddr*)(void*)&from, &fromlen);
1282 
1283         if (resplen <= 0) {
1284             *terrno = errno;
1285             continue;
1286         }
1287 
1288         if (resplen < HFIXEDSZ) {
1289             // Undersized message.
1290             LOG(ERROR) << __func__ << ": undersized: " << resplen;
1291             *terrno = EMSGSIZE;
1292             continue;
1293         }
1294 
1295         HEADER* anhp = (HEADER*)(void*)ans.data();
1296         if (anhp->tc) {
1297             LOG(DEBUG) << __func__ << ": truncated answer";
1298             *terrno = E2BIG;
1299             continue;
1300         }
1301 
1302         *rcode = anhp->rcode;
1303         *terrno = 0;
1304         return resplen;
1305     }
1306 
1307     return 0;
1308 }
1309 
dump_error(const char * str,const struct sockaddr * address)1310 static void dump_error(const char* str, const struct sockaddr* address) {
1311     char hbuf[NI_MAXHOST];
1312     char sbuf[NI_MAXSERV];
1313     constexpr int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
1314     const int err = errno;
1315 
1316     if (!WOULD_LOG(DEBUG)) return;
1317 
1318     if (getnameinfo(address, sockaddrSize(address), hbuf, sizeof(hbuf), sbuf, sizeof(sbuf),
1319                     niflags)) {
1320         strncpy(hbuf, "?", sizeof(hbuf) - 1);
1321         hbuf[sizeof(hbuf) - 1] = '\0';
1322         strncpy(sbuf, "?", sizeof(sbuf) - 1);
1323         sbuf[sizeof(sbuf) - 1] = '\0';
1324     }
1325     errno = err;
1326     PLOG(DEBUG) << __func__ << ": " << str << " ([" << hbuf << "]." << sbuf << "): ";
1327 }
1328 
sock_eq(struct sockaddr * a,struct sockaddr * b)1329 static int sock_eq(struct sockaddr* a, struct sockaddr* b) {
1330     struct sockaddr_in *a4, *b4;
1331     struct sockaddr_in6 *a6, *b6;
1332 
1333     if (a->sa_family != b->sa_family) return 0;
1334     switch (a->sa_family) {
1335         case AF_INET:
1336             a4 = (struct sockaddr_in*) (void*) a;
1337             b4 = (struct sockaddr_in*) (void*) b;
1338             return a4->sin_port == b4->sin_port && a4->sin_addr.s_addr == b4->sin_addr.s_addr;
1339         case AF_INET6:
1340             a6 = (struct sockaddr_in6*) (void*) a;
1341             b6 = (struct sockaddr_in6*) (void*) b;
1342             return a6->sin6_port == b6->sin6_port &&
1343 #ifdef HAVE_SIN6_SCOPE_ID
1344                    a6->sin6_scope_id == b6->sin6_scope_id &&
1345 #endif
1346                    IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
1347         default:
1348             return 0;
1349     }
1350 }
1351 
res_private_dns_send(ResState * statp,const Slice query,const Slice answer,int * rcode,bool * fallback)1352 static int res_private_dns_send(ResState* statp, const Slice query, const Slice answer, int* rcode,
1353                                 bool* fallback) {
1354     const unsigned netId = statp->netid;
1355 
1356     auto& privateDnsConfiguration = PrivateDnsConfiguration::getInstance();
1357     PrivateDnsStatus privateDnsStatus = privateDnsConfiguration.getStatus(netId);
1358     statp->event->set_private_dns_modes(convertEnumType(privateDnsStatus.mode));
1359 
1360     ssize_t result = -1;
1361     switch (privateDnsStatus.mode) {
1362         case PrivateDnsMode::OFF: {
1363             *fallback = true;
1364             return -1;
1365         }
1366         case PrivateDnsMode::OPPORTUNISTIC: {
1367             *fallback = true;
1368             if (privateDnsStatus.hasValidatedDohServers()) {
1369                 result = res_doh_send(statp, query, answer, rcode);
1370                 if (result != DOH_RESULT_CAN_NOT_SEND) return result;
1371             }
1372             return res_tls_send(privateDnsStatus.validatedServers(), statp, query, answer, rcode,
1373                                 privateDnsStatus.mode);
1374         }
1375         case PrivateDnsMode::STRICT: {
1376             *fallback = false;
1377             if (privateDnsStatus.hasValidatedDohServers()) {
1378                 result = res_doh_send(statp, query, answer, rcode);
1379                 if (result != DOH_RESULT_CAN_NOT_SEND) return result;
1380             }
1381             if (privateDnsStatus.validatedServers().empty()) {
1382                 // Sleep and iterate some small number of times checking for the
1383                 // arrival of resolved and validated server IP addresses, instead
1384                 // of returning an immediate error.
1385                 // This is needed because as soon as a network becomes the default network, apps
1386                 // will send DNS queries on that network. If no servers have yet validated, and we
1387                 // do not block those queries, they would immediately fail, causing
1388                 // application-visible errors. Note that this can happen even before the network
1389                 // validates, since an unvalidated network can become the default network if no
1390                 // validated networks are available.
1391                 //
1392                 // TODO: see if there is a better way to address this problem, such as buffering the
1393                 // queries in a queue or only blocking queries for the first few seconds after a
1394                 // default network change.
1395                 for (int i = 0; i < 42; i++) {
1396                     std::this_thread::sleep_for(std::chrono::milliseconds(100));
1397 
1398                     // Calling getStatus() to merely check if there's any validated server seems
1399                     // wasteful. Consider adding a new method in PrivateDnsConfiguration for speed
1400                     // ups.
1401                     privateDnsStatus = privateDnsConfiguration.getStatus(netId);
1402 
1403                     if (privateDnsStatus.hasValidatedDohServers()) {
1404                         result = res_doh_send(statp, query, answer, rcode);
1405                         if (result != DOH_RESULT_CAN_NOT_SEND) return result;
1406                     }
1407 
1408                     // Switch to use the DoT servers if they are validated.
1409                     if (!privateDnsStatus.validatedServers().empty()) {
1410                         break;
1411                     }
1412                 }
1413             }
1414             return res_tls_send(privateDnsStatus.validatedServers(), statp, query, answer, rcode,
1415                                 privateDnsStatus.mode);
1416         }
1417     }
1418     LOG(ERROR) << __func__ << ": unknown private DNS mode";
1419     return -1;
1420 }
1421 
res_doh_send(ResState * statp,const Slice query,const Slice answer,int * rcode)1422 ssize_t res_doh_send(ResState* statp, const Slice query, const Slice answer, int* rcode) {
1423     auto& privateDnsConfiguration = PrivateDnsConfiguration::getInstance();
1424     const unsigned netId = statp->netid;
1425     LOG(DEBUG) << __func__ << ": performing query over Https";
1426     Stopwatch queryStopwatch;
1427     int queryTimeout = Experiments::getInstance()->getFlag(
1428             "doh_query_timeout_ms", PrivateDnsConfiguration::kDohQueryDefaultTimeoutMs);
1429     if (queryTimeout < 1000) {
1430         queryTimeout = 1000;
1431     }
1432     ssize_t result = privateDnsConfiguration.dohQuery(netId, query, answer, queryTimeout);
1433     LOG(INFO) << __func__ << ": Https query result: " << result << ", netid=" << netId;
1434 
1435     if (result == DOH_RESULT_CAN_NOT_SEND) return DOH_RESULT_CAN_NOT_SEND;
1436 
1437     DnsQueryEvent* dnsQueryEvent = statp->event->mutable_dns_query_events()->add_dns_query_event();
1438     dnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
1439     // TODO: Make this information available.
1440     // dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(?));
1441     if (result > 0) {
1442         *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1443     } else {
1444         *rcode = -result;
1445     }
1446     dnsQueryEvent->set_rcode(static_cast<NsRcode>(*rcode));
1447     dnsQueryEvent->set_protocol(PROTO_DOH);
1448     span<const uint8_t> msg(query.base(), query.size());
1449     dnsQueryEvent->set_type(getQueryType(msg));
1450 
1451     auto dohServerAddr = privateDnsConfiguration.getDohServer(netId);
1452     if (dohServerAddr.ok()) {
1453         resolv_stats_add(netId, dohServerAddr.value(), dnsQueryEvent);
1454     }
1455 
1456     return result;
1457 }
1458 
res_tls_send(const std::list<DnsTlsServer> & tlsServers,ResState * statp,const Slice query,const Slice answer,int * rcode,PrivateDnsMode mode)1459 int res_tls_send(const std::list<DnsTlsServer>& tlsServers, ResState* statp, const Slice query,
1460                  const Slice answer, int* rcode, PrivateDnsMode mode) {
1461     if (tlsServers.empty()) return -1;
1462     LOG(DEBUG) << __func__ << ": performing query over TLS";
1463     const bool dotQuickFallback =
1464             (mode == PrivateDnsMode::STRICT)
1465                     ? 0
1466                     : Experiments::getInstance()->getFlag("dot_quick_fallback", 1);
1467     int resplen = 0;
1468     const auto response = DnsTlsDispatcher::getInstance().query(tlsServers, statp, query, answer,
1469                                                                 &resplen, dotQuickFallback);
1470 
1471     LOG(INFO) << __func__ << ": TLS query result: " << static_cast<int>(response);
1472     if (mode == PrivateDnsMode::OPPORTUNISTIC) {
1473         // In opportunistic mode, handle falling back to cleartext in some
1474         // cases (DNS shouldn't fail if a validated opportunistic mode server
1475         // becomes unreachable for some reason).
1476         switch (response) {
1477             case DnsTlsTransport::Response::success:
1478                 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1479                 return resplen;
1480             // It's OPPORTUNISTIC mode,
1481             // hence it's not required to do anything because it'll fallback to UDP.
1482             case DnsTlsTransport::Response::network_error:
1483             case DnsTlsTransport::Response::internal_error:
1484             default:
1485                 return -1;
1486         }
1487     } else {
1488         // Strict mode
1489         switch (response) {
1490             case DnsTlsTransport::Response::success:
1491                 *rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
1492                 return resplen;
1493             case DnsTlsTransport::Response::network_error:
1494                 // This case happens when the query stored in DnsTlsTransport is expired since
1495                 // either 1) the query has been tried for 3 times but no response or 2) fail to
1496                 // establish the connection with the server.
1497                 *rcode = RCODE_TIMEOUT;
1498                 [[fallthrough]];
1499             default:
1500                 return -1;
1501         }
1502     }
1503 }
1504 
resolv_res_nsend(const android_net_context * netContext,std::optional<int> app_socket,span<const uint8_t> msg,span<uint8_t> ans,int * rcode,uint32_t flags,NetworkDnsEventReported * event)1505 int resolv_res_nsend(const android_net_context* netContext, std::optional<int> app_socket,
1506                      span<const uint8_t> msg, span<uint8_t> ans, int* rcode, uint32_t flags,
1507                      NetworkDnsEventReported* event) {
1508     assert(event != nullptr);
1509     ResState res(netContext, app_socket, event);
1510     resolv_populate_res_for_net(&res);
1511     *rcode = NOERROR;
1512     return res_nsend(&res, msg, ans, rcode, flags);
1513 }
1514 
1515 // Returns the elapsed time in milliseconds since the given time `from`.
elapsedTimeInMs(const timespec & from)1516 int elapsedTimeInMs(const timespec& from) {
1517     const timespec now = evNowTime();
1518     return res_stats_calculate_rtt(&now, &from);
1519 }
1520