• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright 2016 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 /* Benchmark gRPC end2end in various configurations */
20 
21 #include <benchmark/benchmark.h>
22 #include <gflags/gflags.h>
23 #include <fstream>
24 
25 #include "src/core/ext/transport/chttp2/transport/chttp2_transport.h"
26 #include "src/core/ext/transport/chttp2/transport/internal.h"
27 #include "src/core/lib/iomgr/timer_manager.h"
28 #include "src/core/lib/profiling/timers.h"
29 #include "src/proto/grpc/testing/echo.grpc.pb.h"
30 #include "test/core/util/test_config.h"
31 #include "test/core/util/trickle_endpoint.h"
32 #include "test/cpp/microbenchmarks/fullstack_context_mutators.h"
33 #include "test/cpp/microbenchmarks/fullstack_fixtures.h"
34 #include "test/cpp/util/test_config.h"
35 
36 DEFINE_bool(log, false, "Log state to CSV files");
37 DEFINE_int32(
38     warmup_megabytes, 1,
39     "Number of megabytes to pump before collecting flow control stats");
40 DEFINE_int32(
41     warmup_iterations, 100,
42     "Number of iterations to run before collecting flow control stats");
43 DEFINE_int32(warmup_max_time_seconds, 10,
44              "Maximum number of seconds to run warmup loop");
45 
46 namespace grpc {
47 namespace testing {
48 
49 gpr_atm g_now_us = 0;
50 
fake_now(gpr_clock_type clock_type)51 static gpr_timespec fake_now(gpr_clock_type clock_type) {
52   gpr_timespec t;
53   gpr_atm now = gpr_atm_no_barrier_load(&g_now_us);
54   t.tv_sec = now / GPR_US_PER_SEC;
55   t.tv_nsec = (now % GPR_US_PER_SEC) * GPR_NS_PER_US;
56   t.clock_type = clock_type;
57   return t;
58 }
59 
inc_time()60 static void inc_time() {
61   gpr_atm_no_barrier_fetch_add(&g_now_us, 100);
62   grpc_timer_manager_tick();
63 }
64 
tag(intptr_t x)65 static void* tag(intptr_t x) { return reinterpret_cast<void*>(x); }
66 
67 template <class A0>
write_csv(std::ostream * out,A0 && a0)68 static void write_csv(std::ostream* out, A0&& a0) {
69   if (!out) return;
70   (*out) << a0 << "\n";
71 }
72 
73 template <class A0, class... Arg>
write_csv(std::ostream * out,A0 && a0,Arg &&...arg)74 static void write_csv(std::ostream* out, A0&& a0, Arg&&... arg) {
75   if (!out) return;
76   (*out) << a0 << ",";
77   write_csv(out, std::forward<Arg>(arg)...);
78 }
79 
80 class TrickledCHTTP2 : public EndpointPairFixture {
81  public:
TrickledCHTTP2(Service * service,bool streaming,size_t req_size,size_t resp_size,size_t kilobits_per_second,grpc_passthru_endpoint_stats * stats)82   TrickledCHTTP2(Service* service, bool streaming, size_t req_size,
83                  size_t resp_size, size_t kilobits_per_second,
84                  grpc_passthru_endpoint_stats* stats)
85       : EndpointPairFixture(service, MakeEndpoints(kilobits_per_second, stats),
86                             FixtureConfiguration()),
87         stats_(stats) {
88     if (FLAGS_log) {
89       std::ostringstream fn;
90       fn << "trickle." << (streaming ? "streaming" : "unary") << "." << req_size
91          << "." << resp_size << "." << kilobits_per_second << ".csv";
92       log_.reset(new std::ofstream(fn.str().c_str()));
93       write_csv(log_.get(), "t", "iteration", "client_backlog",
94                 "server_backlog", "client_t_stall", "client_s_stall",
95                 "server_t_stall", "server_s_stall", "client_t_remote",
96                 "server_t_remote", "client_t_announced", "server_t_announced",
97                 "client_s_remote_delta", "server_s_remote_delta",
98                 "client_s_local_delta", "server_s_local_delta",
99                 "client_s_announced_delta", "server_s_announced_delta",
100                 "client_peer_iws", "client_local_iws", "client_sent_iws",
101                 "client_acked_iws", "server_peer_iws", "server_local_iws",
102                 "server_sent_iws", "server_acked_iws", "client_queued_bytes",
103                 "server_queued_bytes");
104     }
105   }
106 
~TrickledCHTTP2()107   virtual ~TrickledCHTTP2() {
108     if (stats_ != nullptr) {
109       grpc_passthru_endpoint_stats_destroy(stats_);
110     }
111   }
112 
AddToLabel(std::ostream & out,benchmark::State & state)113   void AddToLabel(std::ostream& out, benchmark::State& state) {
114     out << " writes/iter:"
115         << ((double)stats_->num_writes / (double)state.iterations())
116         << " cli_transport_stalls/iter:"
117         << ((double)
118                 client_stats_.streams_stalled_due_to_transport_flow_control /
119             (double)state.iterations())
120         << " cli_stream_stalls/iter:"
121         << ((double)client_stats_.streams_stalled_due_to_stream_flow_control /
122             (double)state.iterations())
123         << " svr_transport_stalls/iter:"
124         << ((double)
125                 server_stats_.streams_stalled_due_to_transport_flow_control /
126             (double)state.iterations())
127         << " svr_stream_stalls/iter:"
128         << ((double)server_stats_.streams_stalled_due_to_stream_flow_control /
129             (double)state.iterations());
130   }
131 
Log(int64_t iteration)132   void Log(int64_t iteration) GPR_ATTRIBUTE_NO_TSAN {
133     auto now = gpr_time_sub(gpr_now(GPR_CLOCK_MONOTONIC), start_);
134     grpc_chttp2_transport* client =
135         reinterpret_cast<grpc_chttp2_transport*>(client_transport_);
136     grpc_chttp2_transport* server =
137         reinterpret_cast<grpc_chttp2_transport*>(server_transport_);
138     grpc_chttp2_stream* client_stream =
139         client->stream_map.count == 1
140             ? static_cast<grpc_chttp2_stream*>(client->stream_map.values[0])
141             : nullptr;
142     grpc_chttp2_stream* server_stream =
143         server->stream_map.count == 1
144             ? static_cast<grpc_chttp2_stream*>(server->stream_map.values[0])
145             : nullptr;
146     write_csv(
147         log_.get(),
148         static_cast<double>(now.tv_sec) +
149             1e-9 * static_cast<double>(now.tv_nsec),
150         iteration, grpc_trickle_get_backlog(endpoint_pair_.client),
151         grpc_trickle_get_backlog(endpoint_pair_.server),
152         client->lists[GRPC_CHTTP2_LIST_STALLED_BY_TRANSPORT].head != nullptr,
153         client->lists[GRPC_CHTTP2_LIST_STALLED_BY_STREAM].head != nullptr,
154         server->lists[GRPC_CHTTP2_LIST_STALLED_BY_TRANSPORT].head != nullptr,
155         server->lists[GRPC_CHTTP2_LIST_STALLED_BY_STREAM].head != nullptr,
156         client->flow_control->remote_window_,
157         server->flow_control->remote_window_,
158         client->flow_control->announced_window_,
159         server->flow_control->announced_window_,
160         client_stream ? client_stream->flow_control->remote_window_delta_ : -1,
161         server_stream ? server_stream->flow_control->remote_window_delta_ : -1,
162         client_stream ? client_stream->flow_control->local_window_delta_ : -1,
163         server_stream ? server_stream->flow_control->local_window_delta_ : -1,
164         client_stream ? client_stream->flow_control->announced_window_delta_
165                       : -1,
166         server_stream ? server_stream->flow_control->announced_window_delta_
167                       : -1,
168         client->settings[GRPC_PEER_SETTINGS]
169                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
170         client->settings[GRPC_LOCAL_SETTINGS]
171                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
172         client->settings[GRPC_SENT_SETTINGS]
173                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
174         client->settings[GRPC_ACKED_SETTINGS]
175                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
176         server->settings[GRPC_PEER_SETTINGS]
177                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
178         server->settings[GRPC_LOCAL_SETTINGS]
179                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
180         server->settings[GRPC_SENT_SETTINGS]
181                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
182         server->settings[GRPC_ACKED_SETTINGS]
183                         [GRPC_CHTTP2_SETTINGS_INITIAL_WINDOW_SIZE],
184         client_stream ? client_stream->flow_controlled_buffer.length : 0,
185         server_stream ? server_stream->flow_controlled_buffer.length : 0);
186   }
187 
Step(bool update_stats)188   void Step(bool update_stats) {
189     grpc_core::ExecCtx exec_ctx;
190     inc_time();
191     size_t client_backlog =
192         grpc_trickle_endpoint_trickle(endpoint_pair_.client);
193     size_t server_backlog =
194         grpc_trickle_endpoint_trickle(endpoint_pair_.server);
195 
196     if (update_stats) {
197       UpdateStats((grpc_chttp2_transport*)client_transport_, &client_stats_,
198                   client_backlog);
199       UpdateStats((grpc_chttp2_transport*)server_transport_, &server_stats_,
200                   server_backlog);
201     }
202   }
203 
204  private:
205   grpc_passthru_endpoint_stats* stats_;
206   struct Stats {
207     int streams_stalled_due_to_stream_flow_control = 0;
208     int streams_stalled_due_to_transport_flow_control = 0;
209   };
210   Stats client_stats_;
211   Stats server_stats_;
212   std::unique_ptr<std::ofstream> log_;
213   gpr_timespec start_ = gpr_now(GPR_CLOCK_MONOTONIC);
214 
MakeEndpoints(size_t kilobits,grpc_passthru_endpoint_stats * stats)215   static grpc_endpoint_pair MakeEndpoints(size_t kilobits,
216                                           grpc_passthru_endpoint_stats* stats) {
217     grpc_endpoint_pair p;
218     grpc_passthru_endpoint_create(&p.client, &p.server,
219                                   LibraryInitializer::get().rq(), stats);
220     double bytes_per_second = 125.0 * kilobits;
221     p.client = grpc_trickle_endpoint_create(p.client, bytes_per_second);
222     p.server = grpc_trickle_endpoint_create(p.server, bytes_per_second);
223     return p;
224   }
225 
UpdateStats(grpc_chttp2_transport * t,Stats * s,size_t backlog)226   void UpdateStats(grpc_chttp2_transport* t, Stats* s,
227                    size_t backlog) GPR_ATTRIBUTE_NO_TSAN {
228     if (backlog == 0) {
229       if (t->lists[GRPC_CHTTP2_LIST_STALLED_BY_STREAM].head != nullptr) {
230         s->streams_stalled_due_to_stream_flow_control++;
231       }
232       if (t->lists[GRPC_CHTTP2_LIST_STALLED_BY_TRANSPORT].head != nullptr) {
233         s->streams_stalled_due_to_transport_flow_control++;
234       }
235     }
236   }
237 };
238 
TrickleCQNext(TrickledCHTTP2 * fixture,void ** t,bool * ok,int64_t iteration)239 static void TrickleCQNext(TrickledCHTTP2* fixture, void** t, bool* ok,
240                           int64_t iteration) {
241   while (true) {
242     fixture->Log(iteration);
243     switch (
244         fixture->cq()->AsyncNext(t, ok, gpr_inf_past(GPR_CLOCK_MONOTONIC))) {
245       case CompletionQueue::TIMEOUT:
246         fixture->Step(iteration != -1);
247         break;
248       case CompletionQueue::SHUTDOWN:
249         GPR_ASSERT(false);
250         break;
251       case CompletionQueue::GOT_EVENT:
252         return;
253     }
254   }
255 }
256 
BM_PumpStreamServerToClient_Trickle(benchmark::State & state)257 static void BM_PumpStreamServerToClient_Trickle(benchmark::State& state) {
258   EchoTestService::AsyncService service;
259   std::unique_ptr<TrickledCHTTP2> fixture(new TrickledCHTTP2(
260       &service, true, state.range(0) /* req_size */,
261       state.range(0) /* resp_size */, state.range(1) /* bw in kbit/s */,
262       grpc_passthru_endpoint_stats_create()));
263   {
264     EchoResponse send_response;
265     EchoResponse recv_response;
266     if (state.range(0) > 0) {
267       send_response.set_message(std::string(state.range(0), 'a'));
268     }
269     Status recv_status;
270     ServerContext svr_ctx;
271     ServerAsyncReaderWriter<EchoResponse, EchoRequest> response_rw(&svr_ctx);
272     service.RequestBidiStream(&svr_ctx, &response_rw, fixture->cq(),
273                               fixture->cq(), tag(0));
274     std::unique_ptr<EchoTestService::Stub> stub(
275         EchoTestService::NewStub(fixture->channel()));
276     ClientContext cli_ctx;
277     auto request_rw = stub->AsyncBidiStream(&cli_ctx, fixture->cq(), tag(1));
278     int need_tags = (1 << 0) | (1 << 1);
279     void* t;
280     bool ok;
281     while (need_tags) {
282       TrickleCQNext(fixture.get(), &t, &ok, -1);
283       GPR_ASSERT(ok);
284       int i = (int)(intptr_t)t;
285       GPR_ASSERT(need_tags & (1 << i));
286       need_tags &= ~(1 << i);
287     }
288     request_rw->Read(&recv_response, tag(0));
289     auto inner_loop = [&](bool in_warmup) {
290       GPR_TIMER_SCOPE("BenchmarkCycle", 0);
291       response_rw.Write(send_response, tag(1));
292       while (true) {
293         TrickleCQNext(fixture.get(), &t, &ok,
294                       in_warmup ? -1 : state.iterations());
295         if (t == tag(0)) {
296           request_rw->Read(&recv_response, tag(0));
297         } else if (t == tag(1)) {
298           break;
299         } else {
300           GPR_ASSERT(false);
301         }
302       }
303     };
304     gpr_timespec warmup_start = gpr_now(GPR_CLOCK_MONOTONIC);
305     for (int i = 0;
306          i < GPR_MAX(FLAGS_warmup_iterations, FLAGS_warmup_megabytes * 1024 *
307                                                   1024 / (14 + state.range(0)));
308          i++) {
309       inner_loop(true);
310       if (gpr_time_cmp(gpr_time_sub(gpr_now(GPR_CLOCK_MONOTONIC), warmup_start),
311                        gpr_time_from_seconds(FLAGS_warmup_max_time_seconds,
312                                              GPR_TIMESPAN)) > 0) {
313         break;
314       }
315     }
316     while (state.KeepRunning()) {
317       inner_loop(false);
318     }
319     response_rw.Finish(Status::OK, tag(1));
320     grpc::Status status;
321     request_rw->Finish(&status, tag(2));
322     need_tags = (1 << 0) | (1 << 1) | (1 << 2);
323     while (need_tags) {
324       TrickleCQNext(fixture.get(), &t, &ok, -1);
325       if (t == tag(0) && ok) {
326         request_rw->Read(&recv_response, tag(0));
327         continue;
328       }
329       int i = (int)(intptr_t)t;
330       GPR_ASSERT(need_tags & (1 << i));
331       need_tags &= ~(1 << i);
332     }
333   }
334   fixture->Finish(state);
335   fixture.reset();
336   state.SetBytesProcessed(state.range(0) * state.iterations());
337 }
338 
StreamingTrickleArgs(benchmark::internal::Benchmark * b)339 static void StreamingTrickleArgs(benchmark::internal::Benchmark* b) {
340   for (int i = 1; i <= 128 * 1024 * 1024; i *= 8) {
341     for (int j = 64; j <= 128 * 1024 * 1024; j *= 8) {
342       double expected_time =
343           static_cast<double>(14 + i) / (125.0 * static_cast<double>(j));
344       if (expected_time > 2.0) continue;
345       b->Args({i, j});
346     }
347   }
348 }
349 BENCHMARK(BM_PumpStreamServerToClient_Trickle)->Apply(StreamingTrickleArgs);
350 
BM_PumpUnbalancedUnary_Trickle(benchmark::State & state)351 static void BM_PumpUnbalancedUnary_Trickle(benchmark::State& state) {
352   EchoTestService::AsyncService service;
353   std::unique_ptr<TrickledCHTTP2> fixture(new TrickledCHTTP2(
354       &service, false, state.range(0) /* req_size */,
355       state.range(1) /* resp_size */, state.range(2) /* bw in kbit/s */,
356       grpc_passthru_endpoint_stats_create()));
357   EchoRequest send_request;
358   EchoResponse send_response;
359   EchoResponse recv_response;
360   if (state.range(0) > 0) {
361     send_request.set_message(std::string(state.range(0), 'a'));
362   }
363   if (state.range(1) > 0) {
364     send_response.set_message(std::string(state.range(1), 'a'));
365   }
366   Status recv_status;
367   struct ServerEnv {
368     ServerContext ctx;
369     EchoRequest recv_request;
370     grpc::ServerAsyncResponseWriter<EchoResponse> response_writer;
371     ServerEnv() : response_writer(&ctx) {}
372   };
373   uint8_t server_env_buffer[2 * sizeof(ServerEnv)];
374   ServerEnv* server_env[2] = {
375       reinterpret_cast<ServerEnv*>(server_env_buffer),
376       reinterpret_cast<ServerEnv*>(server_env_buffer + sizeof(ServerEnv))};
377   new (server_env[0]) ServerEnv;
378   new (server_env[1]) ServerEnv;
379   service.RequestEcho(&server_env[0]->ctx, &server_env[0]->recv_request,
380                       &server_env[0]->response_writer, fixture->cq(),
381                       fixture->cq(), tag(0));
382   service.RequestEcho(&server_env[1]->ctx, &server_env[1]->recv_request,
383                       &server_env[1]->response_writer, fixture->cq(),
384                       fixture->cq(), tag(1));
385   std::unique_ptr<EchoTestService::Stub> stub(
386       EchoTestService::NewStub(fixture->channel()));
387   auto inner_loop = [&](bool in_warmup) {
388     GPR_TIMER_SCOPE("BenchmarkCycle", 0);
389     recv_response.Clear();
390     ClientContext cli_ctx;
391     std::unique_ptr<ClientAsyncResponseReader<EchoResponse>> response_reader(
392         stub->AsyncEcho(&cli_ctx, send_request, fixture->cq()));
393     void* t;
394     bool ok;
395     response_reader->Finish(&recv_response, &recv_status, tag(4));
396     TrickleCQNext(fixture.get(), &t, &ok, in_warmup ? -1 : state.iterations());
397     GPR_ASSERT(ok);
398     GPR_ASSERT(t == tag(0) || t == tag(1));
399     intptr_t slot = reinterpret_cast<intptr_t>(t);
400     ServerEnv* senv = server_env[slot];
401     senv->response_writer.Finish(send_response, Status::OK, tag(3));
402     for (int i = (1 << 3) | (1 << 4); i != 0;) {
403       TrickleCQNext(fixture.get(), &t, &ok,
404                     in_warmup ? -1 : state.iterations());
405       GPR_ASSERT(ok);
406       int tagnum = (int)reinterpret_cast<intptr_t>(t);
407       GPR_ASSERT(i & (1 << tagnum));
408       i -= 1 << tagnum;
409     }
410     GPR_ASSERT(recv_status.ok());
411 
412     senv->~ServerEnv();
413     senv = new (senv) ServerEnv();
414     service.RequestEcho(&senv->ctx, &senv->recv_request, &senv->response_writer,
415                         fixture->cq(), fixture->cq(), tag(slot));
416   };
417   gpr_timespec warmup_start = gpr_now(GPR_CLOCK_MONOTONIC);
418   for (int i = 0;
419        i < GPR_MAX(FLAGS_warmup_iterations, FLAGS_warmup_megabytes * 1024 *
420                                                 1024 / (14 + state.range(0)));
421        i++) {
422     inner_loop(true);
423     if (gpr_time_cmp(gpr_time_sub(gpr_now(GPR_CLOCK_MONOTONIC), warmup_start),
424                      gpr_time_from_seconds(FLAGS_warmup_max_time_seconds,
425                                            GPR_TIMESPAN)) > 0) {
426       break;
427     }
428   }
429   while (state.KeepRunning()) {
430     inner_loop(false);
431   }
432   fixture->Finish(state);
433   fixture.reset();
434   server_env[0]->~ServerEnv();
435   server_env[1]->~ServerEnv();
436   state.SetBytesProcessed(state.range(0) * state.iterations() +
437                           state.range(1) * state.iterations());
438 }
439 
UnaryTrickleArgs(benchmark::internal::Benchmark * b)440 static void UnaryTrickleArgs(benchmark::internal::Benchmark* b) {
441   for (int bw = 64; bw <= 128 * 1024 * 1024; bw *= 16) {
442     b->Args({1, 1, bw});
443     for (int i = 64; i <= 128 * 1024 * 1024; i *= 64) {
444       double expected_time =
445           static_cast<double>(14 + i) / (125.0 * static_cast<double>(bw));
446       if (expected_time > 2.0) continue;
447       b->Args({i, 1, bw});
448       b->Args({1, i, bw});
449       b->Args({i, i, bw});
450     }
451   }
452 }
453 BENCHMARK(BM_PumpUnbalancedUnary_Trickle)->Apply(UnaryTrickleArgs);
454 }  // namespace testing
455 }  // namespace grpc
456 
457 extern gpr_timespec (*gpr_now_impl)(gpr_clock_type clock_type);
458 
459 // Some distros have RunSpecifiedBenchmarks under the benchmark namespace,
460 // and others do not. This allows us to support both modes.
461 namespace benchmark {
RunTheBenchmarksNamespaced()462 void RunTheBenchmarksNamespaced() { RunSpecifiedBenchmarks(); }
463 }  // namespace benchmark
464 
main(int argc,char ** argv)465 int main(int argc, char** argv) {
466   grpc::testing::TestEnvironment env(argc, argv);
467   LibraryInitializer libInit;
468   ::benchmark::Initialize(&argc, argv);
469   ::grpc::testing::InitTest(&argc, &argv, false);
470   grpc_timer_manager_set_threading(false);
471   gpr_now_impl = ::grpc::testing::fake_now;
472   benchmark::RunTheBenchmarksNamespaced();
473 }
474