1 /*
2 *
3 * Copyright 2018 gRPC authors.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19 #include <grpc/support/port_platform.h>
20
21 #include "src/core/lib/iomgr/buffer_list.h"
22 #include "src/core/lib/iomgr/port.h"
23
24 #include <grpc/support/log.h>
25
26 #ifdef GRPC_LINUX_ERRQUEUE
27 #include <netinet/in.h>
28 #include <string.h>
29 #include <time.h>
30
31 #include "src/core/lib/gprpp/memory.h"
32
33 namespace grpc_core {
34 namespace {
35 /** Fills gpr_timespec gts based on values from timespec ts */
fill_gpr_from_timestamp(gpr_timespec * gts,const struct timespec * ts)36 void fill_gpr_from_timestamp(gpr_timespec* gts, const struct timespec* ts) {
37 gts->tv_sec = ts->tv_sec;
38 gts->tv_nsec = static_cast<int32_t>(ts->tv_nsec);
39 gts->clock_type = GPR_CLOCK_REALTIME;
40 }
41
default_timestamps_callback(void *,grpc_core::Timestamps *,grpc_error *)42 void default_timestamps_callback(void* /*arg*/, grpc_core::Timestamps* /*ts*/,
43 grpc_error* /*shudown_err*/) {
44 gpr_log(GPR_DEBUG, "Timestamps callback has not been registered");
45 }
46
47 /** The saved callback function that will be invoked when we get all the
48 * timestamps that we are going to get for a TracedBuffer. */
49 void (*timestamps_callback)(void*, grpc_core::Timestamps*,
50 grpc_error* shutdown_err) =
51 default_timestamps_callback;
52
53 /* Used to extract individual opt stats from cmsg, so as to avoid troubles with
54 * unaligned reads */
55 template <typename T>
read_unaligned(const void * ptr)56 T read_unaligned(const void* ptr) {
57 T val;
58 memcpy(&val, ptr, sizeof(val));
59 return val;
60 }
61
62 /* Extracts opt stats from the tcp_info struct \a info to \a metrics */
extract_opt_stats_from_tcp_info(ConnectionMetrics * metrics,const grpc_core::tcp_info * info)63 void extract_opt_stats_from_tcp_info(ConnectionMetrics* metrics,
64 const grpc_core::tcp_info* info) {
65 if (info == nullptr) {
66 return;
67 }
68 if (info->length > offsetof(grpc_core::tcp_info, tcpi_sndbuf_limited)) {
69 metrics->recurring_retrans.emplace(info->tcpi_retransmits);
70 metrics->is_delivery_rate_app_limited.emplace(
71 info->tcpi_delivery_rate_app_limited);
72 metrics->congestion_window.emplace(info->tcpi_snd_cwnd);
73 metrics->reordering.emplace(info->tcpi_reordering);
74 metrics->packet_retx.emplace(info->tcpi_total_retrans);
75 metrics->pacing_rate.emplace(info->tcpi_pacing_rate);
76 metrics->data_notsent.emplace(info->tcpi_notsent_bytes);
77 if (info->tcpi_min_rtt != UINT32_MAX) {
78 metrics->min_rtt.emplace(info->tcpi_min_rtt);
79 }
80 metrics->packet_sent.emplace(info->tcpi_data_segs_out);
81 metrics->delivery_rate.emplace(info->tcpi_delivery_rate);
82 metrics->busy_usec.emplace(info->tcpi_busy_time);
83 metrics->rwnd_limited_usec.emplace(info->tcpi_rwnd_limited);
84 metrics->sndbuf_limited_usec.emplace(info->tcpi_sndbuf_limited);
85 }
86 if (info->length > offsetof(grpc_core::tcp_info, tcpi_dsack_dups)) {
87 metrics->data_sent.emplace(info->tcpi_bytes_sent);
88 metrics->data_retx.emplace(info->tcpi_bytes_retrans);
89 metrics->packet_spurious_retx.emplace(info->tcpi_dsack_dups);
90 }
91 }
92
93 /** Extracts opt stats from the given control message \a opt_stats to the
94 * connection metrics \a metrics */
extract_opt_stats_from_cmsg(ConnectionMetrics * metrics,const cmsghdr * opt_stats)95 void extract_opt_stats_from_cmsg(ConnectionMetrics* metrics,
96 const cmsghdr* opt_stats) {
97 if (opt_stats == nullptr) {
98 return;
99 }
100 const auto* data = CMSG_DATA(opt_stats);
101 constexpr int64_t cmsg_hdr_len = CMSG_ALIGN(sizeof(struct cmsghdr));
102 const int64_t len = opt_stats->cmsg_len - cmsg_hdr_len;
103 int64_t offset = 0;
104
105 while (offset < len) {
106 const auto* attr = reinterpret_cast<const nlattr*>(data + offset);
107 const void* val = data + offset + NLA_HDRLEN;
108 switch (attr->nla_type) {
109 case TCP_NLA_BUSY: {
110 metrics->busy_usec.emplace(read_unaligned<uint64_t>(val));
111 break;
112 }
113 case TCP_NLA_RWND_LIMITED: {
114 metrics->rwnd_limited_usec.emplace(read_unaligned<uint64_t>(val));
115 break;
116 }
117 case TCP_NLA_SNDBUF_LIMITED: {
118 metrics->sndbuf_limited_usec.emplace(read_unaligned<uint64_t>(val));
119 break;
120 }
121 case TCP_NLA_PACING_RATE: {
122 metrics->pacing_rate.emplace(read_unaligned<uint64_t>(val));
123 break;
124 }
125 case TCP_NLA_DELIVERY_RATE: {
126 metrics->delivery_rate.emplace(read_unaligned<uint64_t>(val));
127 break;
128 }
129 case TCP_NLA_DELIVERY_RATE_APP_LMT: {
130 metrics->is_delivery_rate_app_limited.emplace(
131 read_unaligned<uint8_t>(val));
132 break;
133 }
134 case TCP_NLA_SND_CWND: {
135 metrics->congestion_window.emplace(read_unaligned<uint32_t>(val));
136 break;
137 }
138 case TCP_NLA_MIN_RTT: {
139 metrics->min_rtt.emplace(read_unaligned<uint32_t>(val));
140 break;
141 }
142 case TCP_NLA_SRTT: {
143 metrics->srtt.emplace(read_unaligned<uint32_t>(val));
144 break;
145 }
146 case TCP_NLA_RECUR_RETRANS: {
147 metrics->recurring_retrans.emplace(read_unaligned<uint8_t>(val));
148 break;
149 }
150 case TCP_NLA_BYTES_SENT: {
151 metrics->data_sent.emplace(read_unaligned<uint64_t>(val));
152 break;
153 }
154 case TCP_NLA_DATA_SEGS_OUT: {
155 metrics->packet_sent.emplace(read_unaligned<uint64_t>(val));
156 break;
157 }
158 case TCP_NLA_TOTAL_RETRANS: {
159 metrics->packet_retx.emplace(read_unaligned<uint64_t>(val));
160 break;
161 }
162 case TCP_NLA_DELIVERED: {
163 metrics->packet_delivered.emplace(read_unaligned<uint32_t>(val));
164 break;
165 }
166 case TCP_NLA_DELIVERED_CE: {
167 metrics->packet_delivered_ce.emplace(read_unaligned<uint32_t>(val));
168 break;
169 }
170 case TCP_NLA_BYTES_RETRANS: {
171 metrics->data_retx.emplace(read_unaligned<uint64_t>(val));
172 break;
173 }
174 case TCP_NLA_DSACK_DUPS: {
175 metrics->packet_spurious_retx.emplace(read_unaligned<uint32_t>(val));
176 break;
177 }
178 case TCP_NLA_REORDERING: {
179 metrics->reordering.emplace(read_unaligned<uint32_t>(val));
180 break;
181 }
182 case TCP_NLA_SND_SSTHRESH: {
183 metrics->snd_ssthresh.emplace(read_unaligned<uint32_t>(val));
184 break;
185 }
186 }
187 offset += NLA_ALIGN(attr->nla_len);
188 }
189 }
190
get_socket_tcp_info(grpc_core::tcp_info * info,int fd)191 static int get_socket_tcp_info(grpc_core::tcp_info* info, int fd) {
192 memset(info, 0, sizeof(*info));
193 info->length = sizeof(*info) - sizeof(socklen_t);
194 return getsockopt(fd, IPPROTO_TCP, TCP_INFO, info, &(info->length));
195 }
196 } /* namespace */
197
AddNewEntry(TracedBuffer ** head,uint32_t seq_no,int fd,void * arg)198 void TracedBuffer::AddNewEntry(TracedBuffer** head, uint32_t seq_no, int fd,
199 void* arg) {
200 GPR_DEBUG_ASSERT(head != nullptr);
201 TracedBuffer* new_elem = new TracedBuffer(seq_no, arg);
202 /* Store the current time as the sendmsg time. */
203 new_elem->ts_.sendmsg_time.time = gpr_now(GPR_CLOCK_REALTIME);
204 new_elem->ts_.scheduled_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
205 new_elem->ts_.sent_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
206 new_elem->ts_.acked_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
207
208 if (get_socket_tcp_info(&new_elem->ts_.info, fd) == 0) {
209 extract_opt_stats_from_tcp_info(&new_elem->ts_.sendmsg_time.metrics,
210 &new_elem->ts_.info);
211 }
212 if (*head == nullptr) {
213 *head = new_elem;
214 return;
215 }
216 /* Append at the end. */
217 TracedBuffer* ptr = *head;
218 while (ptr->next_ != nullptr) {
219 ptr = ptr->next_;
220 }
221 ptr->next_ = new_elem;
222 }
223
ProcessTimestamp(TracedBuffer ** head,struct sock_extended_err * serr,struct cmsghdr * opt_stats,struct scm_timestamping * tss)224 void TracedBuffer::ProcessTimestamp(TracedBuffer** head,
225 struct sock_extended_err* serr,
226 struct cmsghdr* opt_stats,
227 struct scm_timestamping* tss) {
228 GPR_DEBUG_ASSERT(head != nullptr);
229 TracedBuffer* elem = *head;
230 TracedBuffer* next = nullptr;
231 while (elem != nullptr) {
232 /* The byte number refers to the sequence number of the last byte which this
233 * timestamp relates to. */
234 if (serr->ee_data >= elem->seq_no_) {
235 switch (serr->ee_info) {
236 case SCM_TSTAMP_SCHED:
237 fill_gpr_from_timestamp(&(elem->ts_.scheduled_time.time),
238 &(tss->ts[0]));
239 extract_opt_stats_from_cmsg(&(elem->ts_.scheduled_time.metrics),
240 opt_stats);
241 elem = elem->next_;
242 break;
243 case SCM_TSTAMP_SND:
244 fill_gpr_from_timestamp(&(elem->ts_.sent_time.time), &(tss->ts[0]));
245 extract_opt_stats_from_cmsg(&(elem->ts_.sent_time.metrics),
246 opt_stats);
247 elem = elem->next_;
248 break;
249 case SCM_TSTAMP_ACK:
250 fill_gpr_from_timestamp(&(elem->ts_.acked_time.time), &(tss->ts[0]));
251 extract_opt_stats_from_cmsg(&(elem->ts_.acked_time.metrics),
252 opt_stats);
253 /* Got all timestamps. Do the callback and free this TracedBuffer.
254 * The thing below can be passed by value if we don't want the
255 * restriction on the lifetime. */
256 timestamps_callback(elem->arg_, &(elem->ts_), GRPC_ERROR_NONE);
257 next = elem->next_;
258 delete static_cast<TracedBuffer*>(elem);
259 *head = elem = next;
260 break;
261 default:
262 abort();
263 }
264 } else {
265 break;
266 }
267 }
268 }
269
Shutdown(TracedBuffer ** head,void * remaining,grpc_error * shutdown_err)270 void TracedBuffer::Shutdown(TracedBuffer** head, void* remaining,
271 grpc_error* shutdown_err) {
272 GPR_DEBUG_ASSERT(head != nullptr);
273 TracedBuffer* elem = *head;
274 while (elem != nullptr) {
275 timestamps_callback(elem->arg_, &(elem->ts_), shutdown_err);
276 auto* next = elem->next_;
277 delete elem;
278 elem = next;
279 }
280 *head = nullptr;
281 if (remaining != nullptr) {
282 timestamps_callback(remaining, nullptr, shutdown_err);
283 }
284 GRPC_ERROR_UNREF(shutdown_err);
285 }
286
grpc_tcp_set_write_timestamps_callback(void (* fn)(void *,grpc_core::Timestamps *,grpc_error * error))287 void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*,
288 grpc_core::Timestamps*,
289 grpc_error* error)) {
290 timestamps_callback = fn;
291 }
292 } /* namespace grpc_core */
293
294 #else /* GRPC_LINUX_ERRQUEUE */
295
296 namespace grpc_core {
grpc_tcp_set_write_timestamps_callback(void (* fn)(void *,grpc_core::Timestamps *,grpc_error * error))297 void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*,
298 grpc_core::Timestamps*,
299 grpc_error* error)) {
300 // Cast value of fn to void to avoid unused parameter warning.
301 // Can't comment out the name because some compilers and formatters don't
302 // like the sequence */* , which would arise from */*fn*/.
303 (void)fn;
304 gpr_log(GPR_DEBUG, "Timestamps callback is not enabled for this platform");
305 }
306 } /* namespace grpc_core */
307
308 #endif /* GRPC_LINUX_ERRQUEUE */
309