• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //
3 // Copyright 2018 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18 
19 #include "src/core/lib/iomgr/buffer_list.h"
20 
21 #include <grpc/support/port_platform.h>
22 #include <grpc/support/time.h>
23 
24 #include "absl/log/log.h"
25 #include "src/core/lib/iomgr/port.h"
26 #include "src/core/util/crash.h"
27 #include "src/core/util/sync.h"
28 
29 #ifdef GRPC_LINUX_ERRQUEUE
30 #include <netinet/in.h>
31 #include <string.h>
32 #include <time.h>
33 
34 namespace grpc_core {
35 namespace {
36 // Fills gpr_timespec gts based on values from timespec ts.
FillGprFromTimestamp(gpr_timespec * gts,const struct timespec * ts)37 void FillGprFromTimestamp(gpr_timespec* gts, const struct timespec* ts) {
38   gts->tv_sec = ts->tv_sec;
39   gts->tv_nsec = static_cast<int32_t>(ts->tv_nsec);
40   gts->clock_type = GPR_CLOCK_REALTIME;
41 }
42 
DefaultTimestampsCallback(void *,Timestamps *,absl::Status)43 void DefaultTimestampsCallback(void* /*arg*/, Timestamps* /*ts*/,
44                                absl::Status /*shutdown_err*/) {
45   VLOG(2) << "Timestamps callback has not been registered";
46 }
47 
48 // The saved callback function that will be invoked when we get all the
49 // timestamps that we are going to get for a TracedBuffer.
50 void (*g_timestamps_callback)(void*, Timestamps*,
51                               grpc_error_handle shutdown_err) =
52     DefaultTimestampsCallback;
53 
54 // Used to extract individual opt stats from cmsg, so as to avoid troubles with
55 // unaligned reads.
56 template <typename T>
ReadUnaligned(const void * ptr)57 T ReadUnaligned(const void* ptr) {
58   T val;
59   memcpy(&val, ptr, sizeof(val));
60   return val;
61 }
62 
63 // Extracts opt stats from the tcp_info struct \a info to \a metrics
ExtractOptStatsFromTcpInfo(ConnectionMetrics * metrics,const tcp_info * info)64 void ExtractOptStatsFromTcpInfo(ConnectionMetrics* metrics,
65                                 const tcp_info* info) {
66   if (info == nullptr) {
67     return;
68   }
69   if (info->length > offsetof(tcp_info, tcpi_sndbuf_limited)) {
70     metrics->recurring_retrans = info->tcpi_retransmits;
71     metrics->is_delivery_rate_app_limited =
72         info->tcpi_delivery_rate_app_limited;
73     metrics->congestion_window = info->tcpi_snd_cwnd;
74     metrics->reordering = info->tcpi_reordering;
75     metrics->packet_retx = info->tcpi_total_retrans;
76     metrics->pacing_rate = info->tcpi_pacing_rate;
77     metrics->data_notsent = info->tcpi_notsent_bytes;
78     if (info->tcpi_min_rtt != UINT32_MAX) {
79       metrics->min_rtt = info->tcpi_min_rtt;
80     }
81     metrics->packet_sent = info->tcpi_data_segs_out;
82     metrics->delivery_rate = info->tcpi_delivery_rate;
83     metrics->busy_usec = info->tcpi_busy_time;
84     metrics->rwnd_limited_usec = info->tcpi_rwnd_limited;
85     metrics->sndbuf_limited_usec = info->tcpi_sndbuf_limited;
86   }
87   if (info->length > offsetof(tcp_info, tcpi_dsack_dups)) {
88     metrics->data_sent = info->tcpi_bytes_sent;
89     metrics->data_retx = info->tcpi_bytes_retrans;
90     metrics->packet_spurious_retx = info->tcpi_dsack_dups;
91   }
92 }
93 
94 // Extracts opt stats from the given control message \a opt_stats to the
95 // connection metrics \a metrics.
ExtractOptStatsFromCmsg(ConnectionMetrics * metrics,const cmsghdr * opt_stats)96 void ExtractOptStatsFromCmsg(ConnectionMetrics* metrics,
97                              const cmsghdr* opt_stats) {
98   if (opt_stats == nullptr) {
99     return;
100   }
101   const auto* data = CMSG_DATA(opt_stats);
102   constexpr int64_t cmsg_hdr_len = CMSG_ALIGN(sizeof(struct cmsghdr));
103   const int64_t len = opt_stats->cmsg_len - cmsg_hdr_len;
104   int64_t offset = 0;
105 
106   while (offset < len) {
107     const auto* attr = reinterpret_cast<const nlattr*>(data + offset);
108     const void* val = data + offset + NLA_HDRLEN;
109     switch (attr->nla_type) {
110       case TCP_NLA_BUSY: {
111         metrics->busy_usec = ReadUnaligned<uint64_t>(val);
112         break;
113       }
114       case TCP_NLA_RWND_LIMITED: {
115         metrics->rwnd_limited_usec = ReadUnaligned<uint64_t>(val);
116         break;
117       }
118       case TCP_NLA_SNDBUF_LIMITED: {
119         metrics->sndbuf_limited_usec = ReadUnaligned<uint64_t>(val);
120         break;
121       }
122       case TCP_NLA_PACING_RATE: {
123         metrics->pacing_rate = ReadUnaligned<uint64_t>(val);
124         break;
125       }
126       case TCP_NLA_DELIVERY_RATE: {
127         metrics->delivery_rate = ReadUnaligned<uint64_t>(val);
128         break;
129       }
130       case TCP_NLA_DELIVERY_RATE_APP_LMT: {
131         metrics->is_delivery_rate_app_limited = ReadUnaligned<uint8_t>(val);
132         break;
133       }
134       case TCP_NLA_SND_CWND: {
135         metrics->congestion_window = ReadUnaligned<uint32_t>(val);
136         break;
137       }
138       case TCP_NLA_MIN_RTT: {
139         metrics->min_rtt = ReadUnaligned<uint32_t>(val);
140         break;
141       }
142       case TCP_NLA_SRTT: {
143         metrics->srtt = ReadUnaligned<uint32_t>(val);
144         break;
145       }
146       case TCP_NLA_RECUR_RETRANS: {
147         metrics->recurring_retrans = ReadUnaligned<uint8_t>(val);
148         break;
149       }
150       case TCP_NLA_BYTES_SENT: {
151         metrics->data_sent = ReadUnaligned<uint64_t>(val);
152         break;
153       }
154       case TCP_NLA_DATA_SEGS_OUT: {
155         metrics->packet_sent = ReadUnaligned<uint64_t>(val);
156         break;
157       }
158       case TCP_NLA_TOTAL_RETRANS: {
159         metrics->packet_retx = ReadUnaligned<uint64_t>(val);
160         break;
161       }
162       case TCP_NLA_DELIVERED: {
163         metrics->packet_delivered = ReadUnaligned<uint32_t>(val);
164         break;
165       }
166       case TCP_NLA_DELIVERED_CE: {
167         metrics->packet_delivered_ce = ReadUnaligned<uint32_t>(val);
168         break;
169       }
170       case TCP_NLA_BYTES_RETRANS: {
171         metrics->data_retx = ReadUnaligned<uint64_t>(val);
172         break;
173       }
174       case TCP_NLA_DSACK_DUPS: {
175         metrics->packet_spurious_retx = ReadUnaligned<uint32_t>(val);
176         break;
177       }
178       case TCP_NLA_REORDERING: {
179         metrics->reordering = ReadUnaligned<uint32_t>(val);
180         break;
181       }
182       case TCP_NLA_SND_SSTHRESH: {
183         metrics->snd_ssthresh = ReadUnaligned<uint32_t>(val);
184         break;
185       }
186     }
187     offset += NLA_ALIGN(attr->nla_len);
188   }
189 }
190 
GetSocketTcpInfo(struct tcp_info * info,int fd)191 int GetSocketTcpInfo(struct tcp_info* info, int fd) {
192   memset(info, 0, sizeof(*info));
193   info->length = offsetof(tcp_info, length);
194   return getsockopt(fd, IPPROTO_TCP, TCP_INFO, info, &(info->length));
195 }
196 
197 }  // namespace.
198 
Finished(gpr_timespec ts)199 bool TracedBufferList::TracedBuffer::Finished(gpr_timespec ts) {
200   constexpr int kGrpcMaxPendingAckTimeMillis = 10000;
201   return gpr_time_to_millis(gpr_time_sub(ts, last_timestamp_)) >
202          kGrpcMaxPendingAckTimeMillis;
203 }
204 
AddNewEntry(int32_t seq_no,int fd,void * arg)205 void TracedBufferList::AddNewEntry(int32_t seq_no, int fd, void* arg) {
206   TracedBuffer* new_elem = new TracedBuffer(seq_no, arg);
207   // Store the current time as the sendmsg time.
208   new_elem->ts_.sendmsg_time.time = gpr_now(GPR_CLOCK_REALTIME);
209   new_elem->ts_.scheduled_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
210   new_elem->ts_.sent_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
211   new_elem->ts_.acked_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
212   if (GetSocketTcpInfo(&(new_elem->ts_.info), fd) == 0) {
213     ExtractOptStatsFromTcpInfo(&(new_elem->ts_.sendmsg_time.metrics),
214                                &(new_elem->ts_.info));
215   }
216   new_elem->last_timestamp_ = new_elem->ts_.sendmsg_time.time;
217   MutexLock lock(&mu_);
218   if (!head_) {
219     head_ = tail_ = new_elem;
220   } else {
221     tail_->next_ = new_elem;
222     tail_ = new_elem;
223   }
224 }
225 
ProcessTimestamp(struct sock_extended_err * serr,struct cmsghdr * opt_stats,struct scm_timestamping * tss)226 void TracedBufferList::ProcessTimestamp(struct sock_extended_err* serr,
227                                         struct cmsghdr* opt_stats,
228                                         struct scm_timestamping* tss) {
229   MutexLock lock(&mu_);
230   TracedBuffer* elem = head_;
231   TracedBuffer* prev = nullptr;
232   while (elem != nullptr) {
233     // The byte number refers to the sequence number of the last byte which this
234     // timestamp relates to.
235     if (serr->ee_data >= elem->seq_no_) {
236       switch (serr->ee_info) {
237         case SCM_TSTAMP_SCHED:
238           FillGprFromTimestamp(&(elem->ts_.scheduled_time.time), &(tss->ts[0]));
239           ExtractOptStatsFromCmsg(&(elem->ts_.scheduled_time.metrics),
240                                   opt_stats);
241           elem->last_timestamp_ = elem->ts_.scheduled_time.time;
242           elem = elem->next_;
243           break;
244         case SCM_TSTAMP_SND:
245           FillGprFromTimestamp(&(elem->ts_.sent_time.time), &(tss->ts[0]));
246           ExtractOptStatsFromCmsg(&(elem->ts_.sent_time.metrics), opt_stats);
247           elem->last_timestamp_ = elem->ts_.sent_time.time;
248           elem = elem->next_;
249           break;
250         case SCM_TSTAMP_ACK:
251           FillGprFromTimestamp(&(elem->ts_.acked_time.time), &(tss->ts[0]));
252           ExtractOptStatsFromCmsg(&(elem->ts_.acked_time.metrics), opt_stats);
253           // Got all timestamps. Do the callback and free this TracedBuffer. The
254           // thing below can be passed by value if we don't want the restriction
255           // on the lifetime.
256           g_timestamps_callback(elem->arg_, &(elem->ts_), absl::OkStatus());
257           // Safe to update head_ to elem->next_ because the list is ordered by
258           // seq_no. Thus if elem is to be deleted, it has to be the first
259           // element in the list.
260           head_ = elem->next_;
261           delete elem;
262           elem = head_;
263           break;
264         default:
265           abort();
266       }
267     } else {
268       break;
269     }
270   }
271 
272   elem = head_;
273   gpr_timespec now = gpr_now(GPR_CLOCK_REALTIME);
274   while (elem != nullptr) {
275     if (!elem->Finished(now)) {
276       prev = elem;
277       elem = elem->next_;
278       continue;
279     }
280     g_timestamps_callback(elem->arg_, &(elem->ts_),
281                           absl::DeadlineExceededError("Ack timed out"));
282     if (prev != nullptr) {
283       prev->next_ = elem->next_;
284       delete elem;
285       elem = prev->next_;
286     } else {
287       head_ = elem->next_;
288       delete elem;
289       elem = head_;
290     }
291   }
292   tail_ = (head_ == nullptr) ? head_ : prev;
293 }
294 
Shutdown(void * remaining,absl::Status shutdown_err)295 void TracedBufferList::Shutdown(void* remaining, absl::Status shutdown_err) {
296   MutexLock lock(&mu_);
297   while (head_) {
298     TracedBuffer* elem = head_;
299     g_timestamps_callback(elem->arg_, &(elem->ts_), shutdown_err);
300     head_ = head_->next_;
301     delete elem;
302   }
303   if (remaining != nullptr) {
304     g_timestamps_callback(remaining, nullptr, shutdown_err);
305   }
306   tail_ = head_;
307 }
308 
grpc_tcp_set_write_timestamps_callback(void (* fn)(void *,Timestamps *,grpc_error_handle error))309 void grpc_tcp_set_write_timestamps_callback(
310     void (*fn)(void*, Timestamps*, grpc_error_handle error)) {
311   g_timestamps_callback = fn;
312 }
313 }  // namespace grpc_core
314 
315 #else  // GRPC_LINUX_ERRQUEUE
316 
317 namespace grpc_core {
grpc_tcp_set_write_timestamps_callback(void (* fn)(void *,Timestamps *,grpc_error_handle error))318 void grpc_tcp_set_write_timestamps_callback(
319     void (*fn)(void*, Timestamps*, grpc_error_handle error)) {
320   // Cast value of fn to void to avoid unused parameter warning.
321   // Can't comment out the name because some compilers and formatters don't
322   // like the sequence */* , which would arise from */*fn*/.
323   (void)fn;
324   VLOG(2) << "Timestamps callback is not enabled for this platform";
325 }
326 }  // namespace grpc_core
327 
328 #endif  // GRPC_LINUX_ERRQUEUE
329