1 /* MIT License
2 *
3 * Copyright (c) 2024 Brad House
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * SPDX-License-Identifier: MIT
25 */
26
27
28 /* IMPLEMENTATION NOTES
29 * ====================
30 *
31 * With very little effort we should be able to determine fairly proper timeouts
32 * we can use based on prior query history. We track in order to be able to
33 * auto-scale when network conditions change (e.g. maybe there is a provider
34 * failover and timings change due to that). Apple appears to do this within
35 * their system resolver in MacOS. Obviously we should have a minimum, maximum,
36 * and initial value to make sure the algorithm doesn't somehow go off the
37 * rails.
38 *
39 * Values:
40 * - Minimum Timeout: 250ms (approximate RTT half-way around the globe)
41 * - Maximum Timeout: 5000ms (Recommended timeout in RFC 1123), can be reduced
42 * by ARES_OPT_MAXTIMEOUTMS, but otherwise the bound specified by the option
43 * caps the retry timeout.
44 * - Initial Timeout: User-specified via configuration or ARES_OPT_TIMEOUTMS
45 * - Average latency multiplier: 5x (a local DNS server returning a cached value
46 * will be quicker than if it needs to recurse so we need to account for this)
47 * - Minimum Count for Average: 3. This is the minimum number of queries we
48 * need to form an average for the bucket.
49 *
50 * Per-server buckets for tracking latency over time (these are ephemeral
51 * meaning they don't persist once a channel is destroyed). We record both the
52 * current timespan for the bucket and the immediate preceding timespan in case
53 * of roll-overs we can still maintain recent metrics for calculations:
54 * - 1 minute
55 * - 15 minutes
56 * - 1 hr
57 * - 1 day
58 * - since inception
59 *
60 * Each bucket would contain:
61 * - timestamp (divided by interval)
62 * - minimum latency
63 * - maximum latency
64 * - total time
65 * - count
66 * NOTE: average latency is (total time / count), we will calculate this
67 * dynamically when needed
68 *
69 * Basic algorithm for calculating timeout to use would be:
70 * - Scan from most recent bucket to least recent
71 * - Check timestamp of bucket, if doesn't match current time, continue to next
72 * bucket
73 * - Check count of bucket, if its not at least the "Minimum Count for Average",
74 * check the previous bucket, otherwise continue to next bucket
75 * - If we reached the end with no bucket match, use "Initial Timeout"
76 * - If bucket is selected, take ("total time" / count) as Average latency,
77 * multiply by "Average Latency Multiplier", bound by "Minimum Timeout" and
78 * "Maximum Timeout"
79 * NOTE: The timeout calculated may not be the timeout used. If we are retrying
80 * the query on the same server another time, then it will use a larger value
81 *
82 * On each query reply where the response is legitimate (proper response or
83 * NXDOMAIN) and not something like a server error:
84 * - Cycle through each bucket in order
85 * - Check timestamp of bucket against current timestamp, if out of date
86 * overwrite previous entry with values, clear current values
87 * - Compare current minimum and maximum recorded latency against query time and
88 * adjust if necessary
89 * - Increment "count" by 1 and "total time" by the query time
90 *
91 * Other Notes:
92 * - This is always-on, the only user-configurable value is the initial
93 * timeout which will simply re-uses the current option.
94 * - Minimum and Maximum latencies for a bucket are currently unused but are
95 * there in case we find a need for them in the future.
96 */
97
98 #include "ares_private.h"
99
100 /*! Minimum timeout value. Chosen due to it being approximately RTT half-way
101 * around the world */
102 #define MIN_TIMEOUT_MS 250
103
104 /*! Multiplier to apply to average latency to come up with an initial timeout */
105 #define AVG_TIMEOUT_MULTIPLIER 5
106
107 /*! Upper timeout bounds, only used if channel->maxtimeout not set */
108 #define MAX_TIMEOUT_MS 5000
109
110 /*! Minimum queries required to form an average */
111 #define MIN_COUNT_FOR_AVERAGE 3
112
ares_metric_timestamp(ares_server_bucket_t bucket,const ares_timeval_t * now,ares_bool_t is_previous)113 static time_t ares_metric_timestamp(ares_server_bucket_t bucket,
114 const ares_timeval_t *now,
115 ares_bool_t is_previous)
116 {
117 time_t divisor = 1; /* Silence bogus MSVC warning by setting default value */
118
119 switch (bucket) {
120 case ARES_METRIC_1MINUTE:
121 divisor = 60;
122 break;
123 case ARES_METRIC_15MINUTES:
124 divisor = 15 * 60;
125 break;
126 case ARES_METRIC_1HOUR:
127 divisor = 60 * 60;
128 break;
129 case ARES_METRIC_1DAY:
130 divisor = 24 * 60 * 60;
131 break;
132 case ARES_METRIC_INCEPTION:
133 return is_previous ? 0 : 1;
134 case ARES_METRIC_COUNT:
135 return 0; /* Invalid! */
136 }
137
138 if (is_previous) {
139 if (divisor >= now->sec) {
140 return 0;
141 }
142 return (time_t)((now->sec - divisor) / divisor);
143 }
144
145 return (time_t)(now->sec / divisor);
146 }
147
ares_metrics_record(const ares_query_t * query,ares_server_t * server,ares_status_t status,const ares_dns_record_t * dnsrec)148 void ares_metrics_record(const ares_query_t *query, ares_server_t *server,
149 ares_status_t status, const ares_dns_record_t *dnsrec)
150 {
151 ares_timeval_t now;
152 ares_timeval_t tvdiff;
153 unsigned int query_ms;
154 ares_dns_rcode_t rcode;
155 ares_server_bucket_t i;
156
157 if (status != ARES_SUCCESS) {
158 return;
159 }
160
161 if (server == NULL) {
162 return;
163 }
164
165 ares_tvnow(&now);
166
167 rcode = ares_dns_record_get_rcode(dnsrec);
168 if (rcode != ARES_RCODE_NOERROR && rcode != ARES_RCODE_NXDOMAIN) {
169 return;
170 }
171
172 ares_timeval_diff(&tvdiff, &query->ts, &now);
173 query_ms = (unsigned int)((tvdiff.sec * 1000) + (tvdiff.usec / 1000));
174 if (query_ms == 0) {
175 query_ms = 1;
176 }
177
178 /* Place in each bucket */
179 for (i = 0; i < ARES_METRIC_COUNT; i++) {
180 time_t ts = ares_metric_timestamp(i, &now, ARES_FALSE);
181
182 /* Copy metrics to prev and clear */
183 if (ts != server->metrics[i].ts) {
184 server->metrics[i].prev_ts = server->metrics[i].ts;
185 server->metrics[i].prev_total_ms = server->metrics[i].total_ms;
186 server->metrics[i].prev_total_count = server->metrics[i].total_count;
187 server->metrics[i].ts = ts;
188 server->metrics[i].latency_min_ms = 0;
189 server->metrics[i].latency_max_ms = 0;
190 server->metrics[i].total_ms = 0;
191 server->metrics[i].total_count = 0;
192 }
193
194 if (server->metrics[i].latency_min_ms == 0 ||
195 server->metrics[i].latency_min_ms > query_ms) {
196 server->metrics[i].latency_min_ms = query_ms;
197 }
198
199 if (query_ms > server->metrics[i].latency_max_ms) {
200 server->metrics[i].latency_max_ms = query_ms;
201 }
202
203 server->metrics[i].total_count++;
204 server->metrics[i].total_ms += (ares_uint64_t)query_ms;
205 }
206 }
207
ares_metrics_server_timeout(const ares_server_t * server,const ares_timeval_t * now)208 size_t ares_metrics_server_timeout(const ares_server_t *server,
209 const ares_timeval_t *now)
210 {
211 const ares_channel_t *channel = server->channel;
212 ares_server_bucket_t i;
213 size_t timeout_ms = 0;
214 size_t max_timeout_ms;
215
216 for (i = 0; i < ARES_METRIC_COUNT; i++) {
217 time_t ts = ares_metric_timestamp(i, now, ARES_FALSE);
218
219 /* This ts has been invalidated, see if we should use the previous
220 * time period */
221 if (ts != server->metrics[i].ts ||
222 server->metrics[i].total_count < MIN_COUNT_FOR_AVERAGE) {
223 time_t prev_ts = ares_metric_timestamp(i, now, ARES_TRUE);
224 if (prev_ts != server->metrics[i].prev_ts ||
225 server->metrics[i].prev_total_count < MIN_COUNT_FOR_AVERAGE) {
226 /* Move onto next bucket */
227 continue;
228 }
229 /* Calculate average time for previous bucket */
230 timeout_ms = (size_t)(server->metrics[i].prev_total_ms /
231 server->metrics[i].prev_total_count);
232 } else {
233 /* Calculate average time for current bucket*/
234 timeout_ms =
235 (size_t)(server->metrics[i].total_ms / server->metrics[i].total_count);
236 }
237
238 /* Multiply average by constant to get timeout value */
239 timeout_ms *= AVG_TIMEOUT_MULTIPLIER;
240 break;
241 }
242
243 /* If we're here, that means its the first query for the server, so we just
244 * use the initial default timeout */
245 if (timeout_ms == 0) {
246 timeout_ms = channel->timeout;
247 }
248
249 /* don't go below lower bounds */
250 if (timeout_ms < MIN_TIMEOUT_MS) {
251 timeout_ms = MIN_TIMEOUT_MS;
252 }
253
254 /* don't go above upper bounds */
255 max_timeout_ms = channel->maxtimeout ? channel->maxtimeout : MAX_TIMEOUT_MS;
256 if (timeout_ms > max_timeout_ms) {
257 timeout_ms = max_timeout_ms;
258 }
259
260 return timeout_ms;
261 }
262