• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/dns/dns_session.h"
6 
7 #include "base/basictypes.h"
8 #include "base/bind.h"
9 #include "base/lazy_instance.h"
10 #include "base/metrics/histogram.h"
11 #include "base/metrics/sample_vector.h"
12 #include "base/rand_util.h"
13 #include "base/stl_util.h"
14 #include "base/time/time.h"
15 #include "net/base/ip_endpoint.h"
16 #include "net/base/net_errors.h"
17 #include "net/dns/dns_config_service.h"
18 #include "net/dns/dns_socket_pool.h"
19 #include "net/socket/stream_socket.h"
20 #include "net/udp/datagram_client_socket.h"
21 
22 namespace net {
23 
24 namespace {
25 // Never exceed max timeout.
26 const unsigned kMaxTimeoutMs = 5000;
27 // Set min timeout, in case we are talking to a local DNS proxy.
28 const unsigned kMinTimeoutMs = 10;
29 
30 // Number of buckets in the histogram of observed RTTs.
31 const size_t kRTTBucketCount = 100;
32 // Target percentile in the RTT histogram used for retransmission timeout.
33 const unsigned kRTOPercentile = 99;
34 }  // namespace
35 
36 // Runtime statistics of DNS server.
37 struct DnsSession::ServerStats {
ServerStatsnet::DnsSession::ServerStats38   ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets)
39     : last_failure_count(0), rtt_estimate(rtt_estimate_param) {
40     rtt_histogram.reset(new base::SampleVector(buckets));
41     // Seed histogram with 2 samples at |rtt_estimate| timeout.
42     rtt_histogram->Accumulate(rtt_estimate.InMilliseconds(), 2);
43   }
44 
45   // Count of consecutive failures after last success.
46   int last_failure_count;
47 
48   // Last time when server returned failure or timeout.
49   base::Time last_failure;
50   // Last time when server returned success.
51   base::Time last_success;
52 
53   // Estimated RTT using moving average.
54   base::TimeDelta rtt_estimate;
55   // Estimated error in the above.
56   base::TimeDelta rtt_deviation;
57 
58   // A histogram of observed RTT .
59   scoped_ptr<base::SampleVector> rtt_histogram;
60 
61   DISALLOW_COPY_AND_ASSIGN(ServerStats);
62 };
63 
64 // static
65 base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ =
66     LAZY_INSTANCE_INITIALIZER;
67 
RttBuckets()68 DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) {
69   base::Histogram::InitializeBucketRanges(1, 5000, this);
70 }
71 
SocketLease(scoped_refptr<DnsSession> session,unsigned server_index,scoped_ptr<DatagramClientSocket> socket)72 DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session,
73                                      unsigned server_index,
74                                      scoped_ptr<DatagramClientSocket> socket)
75     : session_(session), server_index_(server_index), socket_(socket.Pass()) {}
76 
~SocketLease()77 DnsSession::SocketLease::~SocketLease() {
78   session_->FreeSocket(server_index_, socket_.Pass());
79 }
80 
DnsSession(const DnsConfig & config,scoped_ptr<DnsSocketPool> socket_pool,const RandIntCallback & rand_int_callback,NetLog * net_log)81 DnsSession::DnsSession(const DnsConfig& config,
82                        scoped_ptr<DnsSocketPool> socket_pool,
83                        const RandIntCallback& rand_int_callback,
84                        NetLog* net_log)
85     : config_(config),
86       socket_pool_(socket_pool.Pass()),
87       rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)),
88       net_log_(net_log),
89       server_index_(0) {
90   socket_pool_->Initialize(&config_.nameservers, net_log);
91   UMA_HISTOGRAM_CUSTOM_COUNTS(
92       "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 11);
93   for (size_t i = 0; i < config_.nameservers.size(); ++i) {
94     server_stats_.push_back(new ServerStats(config_.timeout,
95                                             rtt_buckets_.Pointer()));
96   }
97 }
98 
~DnsSession()99 DnsSession::~DnsSession() {
100   RecordServerStats();
101 }
102 
NextQueryId() const103 int DnsSession::NextQueryId() const { return rand_callback_.Run(); }
104 
NextFirstServerIndex()105 unsigned DnsSession::NextFirstServerIndex() {
106   unsigned index = NextGoodServerIndex(server_index_);
107   if (config_.rotate)
108     server_index_ = (server_index_ + 1) % config_.nameservers.size();
109   return index;
110 }
111 
NextGoodServerIndex(unsigned server_index)112 unsigned DnsSession::NextGoodServerIndex(unsigned server_index) {
113   unsigned index = server_index;
114   base::Time oldest_server_failure(base::Time::Now());
115   unsigned oldest_server_failure_index = 0;
116 
117   UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood",
118                         server_stats_[server_index]->last_failure.is_null());
119 
120   do {
121     base::Time cur_server_failure = server_stats_[index]->last_failure;
122     // If number of failures on this server doesn't exceed number of allowed
123     // attempts, return its index.
124     if (server_stats_[server_index]->last_failure_count < config_.attempts) {
125       return index;
126     }
127     // Track oldest failed server.
128     if (cur_server_failure < oldest_server_failure) {
129       oldest_server_failure = cur_server_failure;
130       oldest_server_failure_index = index;
131     }
132     index = (index + 1) % config_.nameservers.size();
133   } while (index != server_index);
134 
135   // If we are here it means that there are no successful servers, so we have
136   // to use one that has failed oldest.
137   return oldest_server_failure_index;
138 }
139 
RecordServerFailure(unsigned server_index)140 void DnsSession::RecordServerFailure(unsigned server_index) {
141   UMA_HISTOGRAM_CUSTOM_COUNTS(
142       "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 11);
143   ++(server_stats_[server_index]->last_failure_count);
144   server_stats_[server_index]->last_failure = base::Time::Now();
145 }
146 
RecordServerSuccess(unsigned server_index)147 void DnsSession::RecordServerSuccess(unsigned server_index) {
148   if (server_stats_[server_index]->last_success.is_null()) {
149     UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange",
150                            server_stats_[server_index]->last_failure_count);
151   } else {
152     UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess",
153                            server_stats_[server_index]->last_failure_count);
154   }
155   server_stats_[server_index]->last_failure_count = 0;
156   server_stats_[server_index]->last_failure = base::Time();
157   server_stats_[server_index]->last_success = base::Time::Now();
158 }
159 
RecordRTT(unsigned server_index,base::TimeDelta rtt)160 void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
161   DCHECK_LT(server_index, server_stats_.size());
162 
163   // For measurement, assume it is the first attempt (no backoff).
164   base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0);
165   base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0);
166   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson);
167   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram",
168                       rtt - timeout_histogram);
169   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder",
170                       timeout_jacobson - rtt);
171   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder",
172                       timeout_histogram - rtt);
173 
174   // Jacobson/Karels algorithm for TCP.
175   // Using parameters: alpha = 1/8, delta = 1/4, beta = 4
176   base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate;
177   base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation;
178   base::TimeDelta current_error = rtt - estimate;
179   estimate += current_error / 8;  // * alpha
180   base::TimeDelta abs_error = base::TimeDelta::FromInternalValue(
181       std::abs(current_error.ToInternalValue()));
182   deviation += (abs_error - deviation) / 4;  // * delta
183 
184   // Histogram-based method.
185   server_stats_[server_index]->rtt_histogram
186       ->Accumulate(rtt.InMilliseconds(), 1);
187 }
188 
RecordLostPacket(unsigned server_index,int attempt)189 void DnsSession::RecordLostPacket(unsigned server_index, int attempt) {
190   base::TimeDelta timeout_jacobson =
191       NextTimeoutFromJacobson(server_index, attempt);
192   base::TimeDelta timeout_histogram =
193       NextTimeoutFromHistogram(server_index, attempt);
194   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson);
195   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram);
196 }
197 
RecordServerStats()198 void DnsSession::RecordServerStats() {
199   for (size_t index = 0; index < server_stats_.size(); ++index) {
200     if (server_stats_[index]->last_failure_count) {
201       if (server_stats_[index]->last_success.is_null()) {
202         UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess",
203                              server_stats_[index]->last_failure_count);
204       } else {
205         UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess",
206                              server_stats_[index]->last_failure_count);
207       }
208     }
209   }
210 }
211 
212 
NextTimeout(unsigned server_index,int attempt)213 base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) {
214   // Respect config timeout if it exceeds |kMaxTimeoutMs|.
215   if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs)
216     return config_.timeout;
217   return NextTimeoutFromHistogram(server_index, attempt);
218 }
219 
220 // Allocate a socket, already connected to the server address.
AllocateSocket(unsigned server_index,const NetLog::Source & source)221 scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket(
222     unsigned server_index, const NetLog::Source& source) {
223   scoped_ptr<DatagramClientSocket> socket;
224 
225   socket = socket_pool_->AllocateSocket(server_index);
226   if (!socket.get())
227     return scoped_ptr<SocketLease>();
228 
229   socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE,
230                               source.ToEventParametersCallback());
231 
232   SocketLease* lease = new SocketLease(this, server_index, socket.Pass());
233   return scoped_ptr<SocketLease>(lease);
234 }
235 
CreateTCPSocket(unsigned server_index,const NetLog::Source & source)236 scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket(
237     unsigned server_index, const NetLog::Source& source) {
238   return socket_pool_->CreateTCPSocket(server_index, source);
239 }
240 
241 // Release a socket.
FreeSocket(unsigned server_index,scoped_ptr<DatagramClientSocket> socket)242 void DnsSession::FreeSocket(unsigned server_index,
243                             scoped_ptr<DatagramClientSocket> socket) {
244   DCHECK(socket.get());
245 
246   socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE);
247 
248   socket_pool_->FreeSocket(server_index, socket.Pass());
249 }
250 
NextTimeoutFromJacobson(unsigned server_index,int attempt)251 base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index,
252                                                     int attempt) {
253   DCHECK_LT(server_index, server_stats_.size());
254 
255   base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate +
256                             4 * server_stats_[server_index]->rtt_deviation;
257 
258   timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
259 
260   // The timeout doubles every full round.
261   unsigned num_backoffs = attempt / config_.nameservers.size();
262 
263   return std::min(timeout * (1 << num_backoffs),
264                   base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
265 }
266 
NextTimeoutFromHistogram(unsigned server_index,int attempt)267 base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index,
268                                                      int attempt) {
269   DCHECK_LT(server_index, server_stats_.size());
270 
271   COMPILE_ASSERT(std::numeric_limits<base::HistogramBase::Count>::is_signed,
272                  histogram_base_count_assumed_to_be_signed);
273 
274   // Use fixed percentile of observed samples.
275   const base::SampleVector& samples =
276       *server_stats_[server_index]->rtt_histogram;
277 
278   base::HistogramBase::Count total = samples.TotalCount();
279   base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100;
280   size_t index = 0;
281   while (remaining_count > 0 && index < rtt_buckets_.Get().size()) {
282     remaining_count -= samples.GetCountAtIndex(index);
283     ++index;
284   }
285 
286   base::TimeDelta timeout =
287       base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index));
288 
289   timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
290 
291   // The timeout still doubles every full round.
292   unsigned num_backoffs = attempt / config_.nameservers.size();
293 
294   return std::min(timeout * (1 << num_backoffs),
295                   base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
296 }
297 
298 }  // namespace net
299