1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "resolv"
18
19 #include "DnsTlsDispatcher.h"
20
21 #include <netdutils/Stopwatch.h>
22
23 #include "DnsTlsSocketFactory.h"
24 #include "Experiments.h"
25 #include "PrivateDnsConfiguration.h"
26 #include "resolv_cache.h"
27 #include "resolv_private.h"
28 #include "stats.pb.h"
29
30 #include <android-base/logging.h>
31
32 namespace android {
33 namespace net {
34
35 using android::netdutils::IPSockAddr;
36 using android::netdutils::Stopwatch;
37 using netdutils::Slice;
38
39 // static
40 std::mutex DnsTlsDispatcher::sLock;
41
DnsTlsDispatcher()42 DnsTlsDispatcher::DnsTlsDispatcher() {
43 mFactory.reset(new DnsTlsSocketFactory());
44 }
45
getInstance()46 DnsTlsDispatcher& DnsTlsDispatcher::getInstance() {
47 static DnsTlsDispatcher instance;
48 return instance;
49 }
50
getOrderedAndUsableServerList(const std::list<DnsTlsServer> & tlsServers,unsigned netId,unsigned mark)51 std::list<DnsTlsServer> DnsTlsDispatcher::getOrderedAndUsableServerList(
52 const std::list<DnsTlsServer>& tlsServers, unsigned netId, unsigned mark) {
53 // Our preferred DnsTlsServer order is:
54 // 1) reuse existing IPv6 connections
55 // 2) reuse existing IPv4 connections
56 // 3) establish new IPv6 connections
57 // 4) establish new IPv4 connections
58 std::list<DnsTlsServer> existing6;
59 std::list<DnsTlsServer> existing4;
60 std::list<DnsTlsServer> new6;
61 std::list<DnsTlsServer> new4;
62
63 // Pull out any servers for which we might have existing connections and
64 // place them at the from the list of servers to try.
65 {
66 std::lock_guard guard(sLock);
67
68 for (const auto& tlsServer : tlsServers) {
69 const Key key = std::make_pair(mark, tlsServer);
70 if (const Transport* xport = getTransport(key); xport != nullptr) {
71 // DoT revalidation specific feature.
72 if (!xport->usable()) {
73 // Don't use this xport. It will be removed after timeout
74 // (IDLE_TIMEOUT minutes).
75 LOG(DEBUG) << "Skip using DoT server " << tlsServer.toIpString() << " on "
76 << netId;
77 continue;
78 }
79
80 switch (tlsServer.ss.ss_family) {
81 case AF_INET:
82 existing4.push_back(tlsServer);
83 break;
84 case AF_INET6:
85 existing6.push_back(tlsServer);
86 break;
87 }
88 } else {
89 switch (tlsServer.ss.ss_family) {
90 case AF_INET:
91 new4.push_back(tlsServer);
92 break;
93 case AF_INET6:
94 new6.push_back(tlsServer);
95 break;
96 }
97 }
98 }
99 }
100
101 auto& out = existing6;
102 out.splice(out.cend(), existing4);
103 out.splice(out.cend(), new6);
104 out.splice(out.cend(), new4);
105 return out;
106 }
107
query(const std::list<DnsTlsServer> & tlsServers,ResState * statp,const Slice query,const Slice ans,int * resplen,bool dotQuickFallback)108 DnsTlsTransport::Response DnsTlsDispatcher::query(const std::list<DnsTlsServer>& tlsServers,
109 ResState* statp, const Slice query,
110 const Slice ans, int* resplen,
111 bool dotQuickFallback) {
112 const std::list<DnsTlsServer> servers(
113 getOrderedAndUsableServerList(tlsServers, statp->netid, statp->mark));
114
115 if (servers.empty()) LOG(WARNING) << "No usable DnsTlsServers";
116
117 DnsTlsTransport::Response code = DnsTlsTransport::Response::internal_error;
118 int serverCount = 0;
119 for (const auto& server : servers) {
120 DnsQueryEvent* dnsQueryEvent =
121 statp->event->mutable_dns_query_events()->add_dns_query_event();
122
123 bool connectTriggered = false;
124 Stopwatch queryStopwatch;
125 code = this->query(server, statp->netid, statp->mark, query, ans, resplen,
126 &connectTriggered);
127
128 dnsQueryEvent->set_latency_micros(saturate_cast<int32_t>(queryStopwatch.timeTakenUs()));
129 dnsQueryEvent->set_dns_server_index(serverCount++);
130 dnsQueryEvent->set_ip_version(ipFamilyToIPVersion(server.ss.ss_family));
131 dnsQueryEvent->set_protocol(PROTO_DOT);
132 std::span<const uint8_t> msg(query.base(), query.size());
133 dnsQueryEvent->set_type(getQueryType(msg));
134 dnsQueryEvent->set_connected(connectTriggered);
135
136 switch (code) {
137 // These response codes are valid responses and not expected to
138 // change if another server is queried.
139 case DnsTlsTransport::Response::success:
140 dnsQueryEvent->set_rcode(
141 static_cast<NsRcode>(reinterpret_cast<HEADER*>(ans.base())->rcode));
142 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
143 return code;
144 case DnsTlsTransport::Response::limit_error:
145 dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
146 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
147 return code;
148 // These response codes might differ when trying other servers, so
149 // keep iterating to see if we can get a different (better) result.
150 case DnsTlsTransport::Response::network_error:
151 // Sync from res_tls_send in res_send.cpp
152 dnsQueryEvent->set_rcode(NS_R_TIMEOUT);
153 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
154 if (dotQuickFallback) {
155 return code;
156 }
157 break;
158 case DnsTlsTransport::Response::internal_error:
159 dnsQueryEvent->set_rcode(NS_R_INTERNAL_ERROR);
160 resolv_stats_add(statp->netid, IPSockAddr::toIPSockAddr(server.ss), dnsQueryEvent);
161 break;
162 // No "default" statement.
163 }
164 }
165
166 return code;
167 }
168
query(const DnsTlsServer & server,unsigned netId,unsigned mark,const Slice query,const Slice ans,int * resplen,bool * connectTriggered)169 DnsTlsTransport::Response DnsTlsDispatcher::query(const DnsTlsServer& server, unsigned netId,
170 unsigned mark, const Slice query, const Slice ans,
171 int* resplen, bool* connectTriggered) {
172 // TODO: This can cause the resolver to create multiple connections to the same DoT server
173 // merely due to different mark, such as the bit explicitlySelected unset.
174 // See if we can save them and just create one connection for one DoT server.
175 const Key key = std::make_pair(mark, server);
176 Transport* xport;
177 {
178 std::lock_guard guard(sLock);
179 if (xport = getTransport(key); xport == nullptr) {
180 xport = addTransport(server, mark, netId);
181 }
182 ++xport->useCount;
183 }
184
185 // Don't call this function and hold sLock at the same time because of the following reason:
186 // TLS handshake requires a lock which is also needed by this function, if the handshake gets
187 // stuck, this function also gets blocked.
188 const int connectCounter = xport->transport.getConnectCounter();
189
190 const auto& result = queryInternal(*xport, query);
191 *connectTriggered = (xport->transport.getConnectCounter() > connectCounter);
192
193 DnsTlsTransport::Response code = result.code;
194 if (code == DnsTlsTransport::Response::success) {
195 if (result.response.size() > ans.size()) {
196 LOG(DEBUG) << "Response too large: " << result.response.size() << " > " << ans.size();
197 code = DnsTlsTransport::Response::limit_error;
198 } else {
199 LOG(DEBUG) << "Got response successfully";
200 *resplen = result.response.size();
201 netdutils::copy(ans, netdutils::makeSlice(result.response));
202 }
203 } else {
204 LOG(DEBUG) << "Query failed: " << (unsigned int)code;
205 }
206
207 auto now = std::chrono::steady_clock::now();
208 {
209 std::lock_guard guard(sLock);
210 --xport->useCount;
211 xport->lastUsed = now;
212
213 // DoT revalidation specific feature.
214 if (xport->checkRevalidationNecessary(code)) {
215 // Even if the revalidation passes, it doesn't guarantee that DoT queries
216 // to the xport can stop failing because revalidation creates a new connection
217 // to probe while the xport still uses an existing connection. So far, there isn't
218 // a feasible way to force the xport to disconnect the connection. If the case
219 // happens, the xport will be marked as unusable and DoT queries won't be sent to
220 // it anymore. Eventually, after IDLE_TIMEOUT, the xport will be destroyed, and
221 // a new xport will be created.
222 const auto result = PrivateDnsConfiguration::getInstance().requestValidation(
223 netId, PrivateDnsConfiguration::ServerIdentity{server}, mark);
224 LOG(WARNING) << "Requested validation for " << server.toIpString() << " with mark 0x"
225 << std::hex << mark << ", "
226 << (result.ok() ? "succeeded" : "failed: " + result.error().message());
227 }
228
229 cleanup(now);
230 }
231 return code;
232 }
233
forceCleanup(unsigned netId)234 void DnsTlsDispatcher::forceCleanup(unsigned netId) {
235 std::lock_guard guard(sLock);
236 forceCleanupLocked(netId);
237 }
238
queryInternal(Transport & xport,const netdutils::Slice query)239 DnsTlsTransport::Result DnsTlsDispatcher::queryInternal(Transport& xport,
240 const netdutils::Slice query) {
241 LOG(DEBUG) << "Sending query of length " << query.size();
242
243 // If dot_async_handshake is not set, the call might block in some cases; otherwise,
244 // the call should return very soon.
245 auto res = xport.transport.query(query);
246 LOG(DEBUG) << "Awaiting response";
247
248 if (xport.timeout().count() == -1) {
249 // Infinite timeout.
250 return res.get();
251 }
252
253 const auto status = res.wait_for(xport.timeout());
254 if (status == std::future_status::timeout) {
255 // TODO(b/186613628): notify the Transport to remove this query.
256 LOG(WARNING) << "DoT query timed out after " << xport.timeout().count() << " ms";
257 return DnsTlsTransport::Result{
258 .code = DnsTlsTransport::Response::network_error,
259 .response = {},
260 };
261 }
262
263 return res.get();
264 }
265
266 // This timeout effectively controls how long to keep SSL session tickets.
267 static constexpr std::chrono::minutes IDLE_TIMEOUT(5);
cleanup(std::chrono::time_point<std::chrono::steady_clock> now)268 void DnsTlsDispatcher::cleanup(std::chrono::time_point<std::chrono::steady_clock> now) {
269 // To avoid scanning mStore after every query, return early if a cleanup has been
270 // performed recently.
271 if (now - mLastCleanup < IDLE_TIMEOUT) {
272 return;
273 }
274 for (auto it = mStore.begin(); it != mStore.end();) {
275 auto& s = it->second;
276 if (s->useCount == 0 && now - s->lastUsed > IDLE_TIMEOUT) {
277 it = mStore.erase(it);
278 } else {
279 ++it;
280 }
281 }
282 mLastCleanup = now;
283 }
284
285 // TODO: unify forceCleanupLocked() and cleanup().
forceCleanupLocked(unsigned netId)286 void DnsTlsDispatcher::forceCleanupLocked(unsigned netId) {
287 for (auto it = mStore.begin(); it != mStore.end();) {
288 auto& s = it->second;
289 if (s->useCount == 0 && s->mNetId == netId) {
290 it = mStore.erase(it);
291 } else {
292 ++it;
293 }
294 }
295 }
296
addTransport(const DnsTlsServer & server,unsigned mark,unsigned netId)297 DnsTlsDispatcher::Transport* DnsTlsDispatcher::addTransport(const DnsTlsServer& server,
298 unsigned mark, unsigned netId) {
299 const Key key = std::make_pair(mark, server);
300 Transport* ret = getTransport(key);
301 if (ret != nullptr) return ret;
302
303 const Experiments* const instance = Experiments::getInstance();
304 int triggerThr =
305 instance->getFlag("dot_revalidation_threshold", Transport::kDotRevalidationThreshold);
306 int unusableThr = instance->getFlag("dot_xport_unusable_threshold",
307 Transport::kDotXportUnusableThreshold);
308 int queryTimeout = instance->getFlag("dot_query_timeout_ms", Transport::kDotQueryTimeoutMs);
309
310 // Check and adjust the parameters if they are improperly set.
311 bool revalidationEnabled = false;
312 const bool isForOpportunisticMode = server.name.empty();
313 if (triggerThr > 0 && unusableThr > 0 && isForOpportunisticMode) {
314 revalidationEnabled = true;
315 } else {
316 triggerThr = -1;
317 unusableThr = -1;
318 }
319 if (queryTimeout < 0) {
320 queryTimeout = -1;
321 } else if (queryTimeout < 1000) {
322 queryTimeout = 1000;
323 }
324
325 ret = new Transport(server, mark, netId, mFactory.get(), revalidationEnabled, triggerThr,
326 unusableThr, queryTimeout);
327 LOG(DEBUG) << "Transport is initialized with { " << triggerThr << ", " << unusableThr << ", "
328 << queryTimeout << "ms }"
329 << " for server { " << server.toIpString() << "/" << server.name << " }";
330
331 mStore[key].reset(ret);
332
333 return ret;
334 }
335
getTransport(const Key & key)336 DnsTlsDispatcher::Transport* DnsTlsDispatcher::getTransport(const Key& key) {
337 auto it = mStore.find(key);
338 return (it == mStore.end() ? nullptr : it->second.get());
339 }
340
checkRevalidationNecessary(DnsTlsTransport::Response code)341 bool DnsTlsDispatcher::Transport::checkRevalidationNecessary(DnsTlsTransport::Response code) {
342 if (!revalidationEnabled) return false;
343
344 if (code == DnsTlsTransport::Response::network_error) {
345 continuousfailureCount++;
346 } else {
347 continuousfailureCount = 0;
348 }
349
350 // triggerThreshold must be greater than 0 because the value of revalidationEnabled is true.
351 if (usable() && continuousfailureCount == triggerThreshold) {
352 return true;
353 }
354 return false;
355 }
356
usable() const357 bool DnsTlsDispatcher::Transport::usable() const {
358 if (!revalidationEnabled) return true;
359
360 return continuousfailureCount < unusableThreshold;
361 }
362
363 } // end of namespace net
364 } // end of namespace android
365