1 /*
2 *
3 * Copyright 2019 gRPC authors.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19 #include <grpc/grpc.h>
20 #include <grpc/support/alloc.h>
21 #include <grpc/support/atm.h>
22 #include <grpc/support/log.h>
23 #include <grpc/support/port_platform.h>
24 #include <grpc/support/string_util.h>
25 #include <grpc/support/time.h>
26 #include <grpcpp/channel.h>
27 #include <grpcpp/client_context.h>
28 #include <grpcpp/create_channel.h>
29 #include <grpcpp/health_check_service_interface.h>
30 #include <grpcpp/server.h>
31 #include <grpcpp/server_builder.h>
32 #include <gtest/gtest.h>
33
34 #include <algorithm>
35 #include <condition_variable>
36 #include <memory>
37 #include <mutex>
38 #include <random>
39 #include <thread>
40
41 #include "src/core/lib/backoff/backoff.h"
42 #include "src/core/lib/gpr/env.h"
43 #include "src/proto/grpc/testing/echo.grpc.pb.h"
44 #include "test/core/util/debugger_macros.h"
45 #include "test/core/util/port.h"
46 #include "test/core/util/test_config.h"
47 #include "test/cpp/end2end/test_service_impl.h"
48 #include "test/cpp/util/test_credentials_provider.h"
49
50 #ifdef GPR_LINUX
51 using grpc::testing::EchoRequest;
52 using grpc::testing::EchoResponse;
53
54 namespace grpc {
55 namespace testing {
56 namespace {
57
58 struct TestScenario {
TestScenariogrpc::testing::__anonbfb03ea20111::TestScenario59 TestScenario(const std::string& creds_type, const std::string& content)
60 : credentials_type(creds_type), message_content(content) {}
61 const std::string credentials_type;
62 const std::string message_content;
63 };
64
65 class FlakyNetworkTest : public ::testing::TestWithParam<TestScenario> {
66 protected:
FlakyNetworkTest()67 FlakyNetworkTest()
68 : server_host_("grpctest"),
69 interface_("lo:1"),
70 ipv4_address_("10.0.0.1"),
71 netmask_("/32") {}
72
InterfaceUp()73 void InterfaceUp() {
74 std::ostringstream cmd;
75 // create interface_ with address ipv4_address_
76 cmd << "ip addr add " << ipv4_address_ << netmask_ << " dev " << interface_;
77 std::system(cmd.str().c_str());
78 }
79
InterfaceDown()80 void InterfaceDown() {
81 std::ostringstream cmd;
82 // remove interface_
83 cmd << "ip addr del " << ipv4_address_ << netmask_ << " dev " << interface_;
84 std::system(cmd.str().c_str());
85 }
86
DNSUp()87 void DNSUp() {
88 std::ostringstream cmd;
89 // Add DNS entry for server_host_ in /etc/hosts
90 cmd << "echo '" << ipv4_address_ << " " << server_host_
91 << "' >> /etc/hosts";
92 std::system(cmd.str().c_str());
93 }
94
DNSDown()95 void DNSDown() {
96 std::ostringstream cmd;
97 // Remove DNS entry for server_host_ from /etc/hosts
98 // NOTE: we can't do this in one step with sed -i because when we are
99 // running under docker, the file is mounted by docker so we can't change
100 // its inode from within the container (sed -i creates a new file and
101 // replaces the old file, which changes the inode)
102 cmd << "sed '/" << server_host_ << "/d' /etc/hosts > /etc/hosts.orig";
103 std::system(cmd.str().c_str());
104
105 // clear the stream
106 cmd.str("");
107
108 cmd << "cat /etc/hosts.orig > /etc/hosts";
109 std::system(cmd.str().c_str());
110 }
111
DropPackets()112 void DropPackets() {
113 std::ostringstream cmd;
114 // drop packets with src IP = ipv4_address_
115 cmd << "iptables -A INPUT -s " << ipv4_address_ << " -j DROP";
116
117 std::system(cmd.str().c_str());
118 // clear the stream
119 cmd.str("");
120
121 // drop packets with dst IP = ipv4_address_
122 cmd << "iptables -A INPUT -d " << ipv4_address_ << " -j DROP";
123 }
124
RestoreNetwork()125 void RestoreNetwork() {
126 std::ostringstream cmd;
127 // remove iptables rule to drop packets with src IP = ipv4_address_
128 cmd << "iptables -D INPUT -s " << ipv4_address_ << " -j DROP";
129 std::system(cmd.str().c_str());
130 // clear the stream
131 cmd.str("");
132 // remove iptables rule to drop packets with dest IP = ipv4_address_
133 cmd << "iptables -D INPUT -d " << ipv4_address_ << " -j DROP";
134 }
135
FlakeNetwork()136 void FlakeNetwork() {
137 std::ostringstream cmd;
138 // Emulate a flaky network connection over interface_. Add a delay of 100ms
139 // +/- 20ms, 0.1% packet loss, 1% duplicates and 0.01% corrupt packets.
140 cmd << "tc qdisc replace dev " << interface_
141 << " root netem delay 100ms 20ms distribution normal loss 0.1% "
142 "duplicate "
143 "0.1% corrupt 0.01% ";
144 std::system(cmd.str().c_str());
145 }
146
UnflakeNetwork()147 void UnflakeNetwork() {
148 // Remove simulated network flake on interface_
149 std::ostringstream cmd;
150 cmd << "tc qdisc del dev " << interface_ << " root netem";
151 std::system(cmd.str().c_str());
152 }
153
NetworkUp()154 void NetworkUp() {
155 InterfaceUp();
156 DNSUp();
157 }
158
NetworkDown()159 void NetworkDown() {
160 InterfaceDown();
161 DNSDown();
162 }
163
SetUp()164 void SetUp() override {
165 NetworkUp();
166 grpc_init();
167 StartServer();
168 }
169
TearDown()170 void TearDown() override {
171 NetworkDown();
172 StopServer();
173 grpc_shutdown();
174 }
175
StartServer()176 void StartServer() {
177 // TODO (pjaikumar): Ideally, we should allocate the port dynamically using
178 // grpc_pick_unused_port_or_die(). That doesn't work inside some docker
179 // containers because port_server listens on localhost which maps to
180 // ip6-looopback, but ipv6 support is not enabled by default in docker.
181 port_ = SERVER_PORT;
182
183 server_.reset(new ServerData(port_, GetParam().credentials_type));
184 server_->Start(server_host_);
185 }
StopServer()186 void StopServer() { server_->Shutdown(); }
187
BuildStub(const std::shared_ptr<Channel> & channel)188 std::unique_ptr<grpc::testing::EchoTestService::Stub> BuildStub(
189 const std::shared_ptr<Channel>& channel) {
190 return grpc::testing::EchoTestService::NewStub(channel);
191 }
192
BuildChannel(const std::string & lb_policy_name,ChannelArguments args=ChannelArguments ())193 std::shared_ptr<Channel> BuildChannel(
194 const std::string& lb_policy_name,
195 ChannelArguments args = ChannelArguments()) {
196 if (lb_policy_name.size() > 0) {
197 args.SetLoadBalancingPolicyName(lb_policy_name);
198 } // else, default to pick first
199 auto channel_creds = GetCredentialsProvider()->GetChannelCredentials(
200 GetParam().credentials_type, &args);
201 std::ostringstream server_address;
202 server_address << server_host_ << ":" << port_;
203 return CreateCustomChannel(server_address.str(), channel_creds, args);
204 }
205
SendRpc(const std::unique_ptr<grpc::testing::EchoTestService::Stub> & stub,int timeout_ms=0,bool wait_for_ready=false)206 bool SendRpc(
207 const std::unique_ptr<grpc::testing::EchoTestService::Stub>& stub,
208 int timeout_ms = 0, bool wait_for_ready = false) {
209 auto response = std::unique_ptr<EchoResponse>(new EchoResponse());
210 EchoRequest request;
211 auto& msg = GetParam().message_content;
212 request.set_message(msg);
213 ClientContext context;
214 if (timeout_ms > 0) {
215 context.set_deadline(grpc_timeout_milliseconds_to_deadline(timeout_ms));
216 // Allow an RPC to be canceled (for deadline exceeded) after it has
217 // reached the server.
218 request.mutable_param()->set_skip_cancelled_check(true);
219 }
220 // See https://github.com/grpc/grpc/blob/master/doc/wait-for-ready.md for
221 // details of wait-for-ready semantics
222 if (wait_for_ready) {
223 context.set_wait_for_ready(true);
224 }
225 Status status = stub->Echo(&context, request, response.get());
226 auto ok = status.ok();
227 int stream_id = 0;
228 grpc_call* call = context.c_call();
229 if (call) {
230 grpc_chttp2_stream* stream = grpc_chttp2_stream_from_call(call);
231 if (stream) {
232 stream_id = stream->id;
233 }
234 }
235 if (ok) {
236 gpr_log(GPR_DEBUG, "RPC with stream_id %d succeeded", stream_id);
237 } else {
238 gpr_log(GPR_DEBUG, "RPC with stream_id %d failed: %s", stream_id,
239 status.error_message().c_str());
240 }
241 return ok;
242 }
243
244 struct ServerData {
245 int port_;
246 const std::string creds_;
247 std::unique_ptr<Server> server_;
248 TestServiceImpl service_;
249 std::unique_ptr<std::thread> thread_;
250 bool server_ready_ = false;
251
ServerDatagrpc::testing::__anonbfb03ea20111::FlakyNetworkTest::ServerData252 ServerData(int port, const std::string& creds)
253 : port_(port), creds_(creds) {}
254
Startgrpc::testing::__anonbfb03ea20111::FlakyNetworkTest::ServerData255 void Start(const std::string& server_host) {
256 gpr_log(GPR_INFO, "starting server on port %d", port_);
257 std::mutex mu;
258 std::unique_lock<std::mutex> lock(mu);
259 std::condition_variable cond;
260 thread_.reset(new std::thread(
261 std::bind(&ServerData::Serve, this, server_host, &mu, &cond)));
262 cond.wait(lock, [this] { return server_ready_; });
263 server_ready_ = false;
264 gpr_log(GPR_INFO, "server startup complete");
265 }
266
Servegrpc::testing::__anonbfb03ea20111::FlakyNetworkTest::ServerData267 void Serve(const std::string& server_host, std::mutex* mu,
268 std::condition_variable* cond) {
269 std::ostringstream server_address;
270 server_address << server_host << ":" << port_;
271 ServerBuilder builder;
272 auto server_creds =
273 GetCredentialsProvider()->GetServerCredentials(creds_);
274 builder.AddListeningPort(server_address.str(), server_creds);
275 builder.RegisterService(&service_);
276 server_ = builder.BuildAndStart();
277 std::lock_guard<std::mutex> lock(*mu);
278 server_ready_ = true;
279 cond->notify_one();
280 }
281
Shutdowngrpc::testing::__anonbfb03ea20111::FlakyNetworkTest::ServerData282 void Shutdown() {
283 server_->Shutdown(grpc_timeout_milliseconds_to_deadline(0));
284 thread_->join();
285 }
286 };
287
WaitForChannelNotReady(Channel * channel,int timeout_seconds=5)288 bool WaitForChannelNotReady(Channel* channel, int timeout_seconds = 5) {
289 const gpr_timespec deadline =
290 grpc_timeout_seconds_to_deadline(timeout_seconds);
291 grpc_connectivity_state state;
292 while ((state = channel->GetState(false /* try_to_connect */)) ==
293 GRPC_CHANNEL_READY) {
294 if (!channel->WaitForStateChange(state, deadline)) return false;
295 }
296 return true;
297 }
298
WaitForChannelReady(Channel * channel,int timeout_seconds=5)299 bool WaitForChannelReady(Channel* channel, int timeout_seconds = 5) {
300 const gpr_timespec deadline =
301 grpc_timeout_seconds_to_deadline(timeout_seconds);
302 grpc_connectivity_state state;
303 while ((state = channel->GetState(true /* try_to_connect */)) !=
304 GRPC_CHANNEL_READY) {
305 if (!channel->WaitForStateChange(state, deadline)) return false;
306 }
307 return true;
308 }
309
310 private:
311 const std::string server_host_;
312 const std::string interface_;
313 const std::string ipv4_address_;
314 const std::string netmask_;
315 std::unique_ptr<grpc::testing::EchoTestService::Stub> stub_;
316 std::unique_ptr<ServerData> server_;
317 const int SERVER_PORT = 32750;
318 int port_;
319 };
320
CreateTestScenarios()321 std::vector<TestScenario> CreateTestScenarios() {
322 std::vector<TestScenario> scenarios;
323 std::vector<std::string> credentials_types;
324 std::vector<std::string> messages;
325
326 credentials_types.push_back(kInsecureCredentialsType);
327 auto sec_list = GetCredentialsProvider()->GetSecureCredentialsTypeList();
328 for (auto sec = sec_list.begin(); sec != sec_list.end(); sec++) {
329 credentials_types.push_back(*sec);
330 }
331
332 messages.push_back("");
333 for (size_t k = 1; k < GRPC_DEFAULT_MAX_RECV_MESSAGE_LENGTH / 1024; k *= 32) {
334 std::string big_msg;
335 for (size_t i = 0; i < k * 1024; ++i) {
336 char c = 'a' + (i % 26);
337 big_msg += c;
338 }
339 messages.push_back(big_msg);
340 }
341 for (auto cred = credentials_types.begin(); cred != credentials_types.end();
342 ++cred) {
343 for (auto msg = messages.begin(); msg != messages.end(); msg++) {
344 scenarios.emplace_back(*cred, *msg);
345 }
346 }
347
348 return scenarios;
349 }
350
351 INSTANTIATE_TEST_SUITE_P(FlakyNetworkTest, FlakyNetworkTest,
352 ::testing::ValuesIn(CreateTestScenarios()));
353
354 // Network interface connected to server flaps
TEST_P(FlakyNetworkTest,NetworkTransition)355 TEST_P(FlakyNetworkTest, NetworkTransition) {
356 const int kKeepAliveTimeMs = 1000;
357 const int kKeepAliveTimeoutMs = 1000;
358 ChannelArguments args;
359 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
360 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
361 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
362 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
363
364 auto channel = BuildChannel("pick_first", args);
365 auto stub = BuildStub(channel);
366 // Channel should be in READY state after we send an RPC
367 EXPECT_TRUE(SendRpc(stub));
368 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
369
370 std::atomic_bool shutdown{false};
371 std::thread sender = std::thread([this, &stub, &shutdown]() {
372 while (true) {
373 if (shutdown.load()) {
374 return;
375 }
376 SendRpc(stub);
377 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
378 }
379 });
380
381 // bring down network
382 NetworkDown();
383 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
384 // bring network interface back up
385 InterfaceUp();
386 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
387 // Restore DNS entry for server
388 DNSUp();
389 EXPECT_TRUE(WaitForChannelReady(channel.get()));
390 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
391 shutdown.store(true);
392 sender.join();
393 }
394
395 // Traffic to server server is blackholed temporarily with keepalives enabled
TEST_P(FlakyNetworkTest,ServerUnreachableWithKeepalive)396 TEST_P(FlakyNetworkTest, ServerUnreachableWithKeepalive) {
397 const int kKeepAliveTimeMs = 1000;
398 const int kKeepAliveTimeoutMs = 1000;
399 const int kReconnectBackoffMs = 1000;
400 ChannelArguments args;
401 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
402 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
403 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
404 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
405 // max time for a connection attempt
406 args.SetInt(GRPC_ARG_MIN_RECONNECT_BACKOFF_MS, kReconnectBackoffMs);
407 // max time between reconnect attempts
408 args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, kReconnectBackoffMs);
409
410 gpr_log(GPR_DEBUG, "FlakyNetworkTest.ServerUnreachableWithKeepalive start");
411 auto channel = BuildChannel("pick_first", args);
412 auto stub = BuildStub(channel);
413 // Channel should be in READY state after we send an RPC
414 EXPECT_TRUE(SendRpc(stub));
415 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
416
417 std::atomic_bool shutdown{false};
418 std::thread sender = std::thread([this, &stub, &shutdown]() {
419 while (true) {
420 if (shutdown.load()) {
421 return;
422 }
423 SendRpc(stub);
424 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
425 }
426 });
427
428 // break network connectivity
429 gpr_log(GPR_DEBUG, "Adding iptables rule to drop packets");
430 DropPackets();
431 std::this_thread::sleep_for(std::chrono::milliseconds(10000));
432 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
433 // bring network interface back up
434 RestoreNetwork();
435 gpr_log(GPR_DEBUG, "Removed iptables rule to drop packets");
436 EXPECT_TRUE(WaitForChannelReady(channel.get()));
437 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
438 shutdown.store(true);
439 sender.join();
440 gpr_log(GPR_DEBUG, "FlakyNetworkTest.ServerUnreachableWithKeepalive end");
441 }
442
443 //
444 // Traffic to server server is blackholed temporarily with keepalives disabled
TEST_P(FlakyNetworkTest,ServerUnreachableNoKeepalive)445 TEST_P(FlakyNetworkTest, ServerUnreachableNoKeepalive) {
446 auto channel = BuildChannel("pick_first", ChannelArguments());
447 auto stub = BuildStub(channel);
448 // Channel should be in READY state after we send an RPC
449 EXPECT_TRUE(SendRpc(stub));
450 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
451
452 // break network connectivity
453 DropPackets();
454
455 std::thread sender = std::thread([this, &stub]() {
456 // RPC with deadline should timeout
457 EXPECT_FALSE(SendRpc(stub, /*timeout_ms=*/500, /*wait_for_ready=*/true));
458 // RPC without deadline forever until call finishes
459 EXPECT_TRUE(SendRpc(stub, /*timeout_ms=*/0, /*wait_for_ready=*/true));
460 });
461
462 std::this_thread::sleep_for(std::chrono::milliseconds(2000));
463 // bring network interface back up
464 RestoreNetwork();
465
466 // wait for RPC to finish
467 sender.join();
468 }
469
470 // Send RPCs over a flaky network connection
TEST_P(FlakyNetworkTest,FlakyNetwork)471 TEST_P(FlakyNetworkTest, FlakyNetwork) {
472 const int kKeepAliveTimeMs = 1000;
473 const int kKeepAliveTimeoutMs = 1000;
474 const int kMessageCount = 100;
475 ChannelArguments args;
476 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
477 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
478 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
479 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
480
481 auto channel = BuildChannel("pick_first", args);
482 auto stub = BuildStub(channel);
483 // Channel should be in READY state after we send an RPC
484 EXPECT_TRUE(SendRpc(stub));
485 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
486
487 // simulate flaky network (packet loss, corruption and delays)
488 FlakeNetwork();
489 for (int i = 0; i < kMessageCount; ++i) {
490 SendRpc(stub);
491 }
492 // remove network flakiness
493 UnflakeNetwork();
494 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
495 }
496
497 // Server is shutdown gracefully and restarted. Client keepalives are enabled
TEST_P(FlakyNetworkTest,ServerRestartKeepaliveEnabled)498 TEST_P(FlakyNetworkTest, ServerRestartKeepaliveEnabled) {
499 const int kKeepAliveTimeMs = 1000;
500 const int kKeepAliveTimeoutMs = 1000;
501 ChannelArguments args;
502 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
503 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
504 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
505 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
506
507 auto channel = BuildChannel("pick_first", args);
508 auto stub = BuildStub(channel);
509 // Channel should be in READY state after we send an RPC
510 EXPECT_TRUE(SendRpc(stub));
511 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
512
513 // server goes down, client should detect server going down and calls should
514 // fail
515 StopServer();
516 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
517 EXPECT_FALSE(SendRpc(stub));
518
519 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
520
521 // server restarts, calls succeed
522 StartServer();
523 EXPECT_TRUE(WaitForChannelReady(channel.get()));
524 // EXPECT_TRUE(SendRpc(stub));
525 }
526
527 // Server is shutdown gracefully and restarted. Client keepalives are enabled
TEST_P(FlakyNetworkTest,ServerRestartKeepaliveDisabled)528 TEST_P(FlakyNetworkTest, ServerRestartKeepaliveDisabled) {
529 auto channel = BuildChannel("pick_first", ChannelArguments());
530 auto stub = BuildStub(channel);
531 // Channel should be in READY state after we send an RPC
532 EXPECT_TRUE(SendRpc(stub));
533 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
534
535 // server sends GOAWAY when it's shutdown, so client attempts to reconnect
536 StopServer();
537 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
538
539 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
540
541 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
542
543 // server restarts, calls succeed
544 StartServer();
545 EXPECT_TRUE(WaitForChannelReady(channel.get()));
546 }
547
548 } // namespace
549 } // namespace testing
550 } // namespace grpc
551 #endif // GPR_LINUX
552
main(int argc,char ** argv)553 int main(int argc, char** argv) {
554 ::testing::InitGoogleTest(&argc, argv);
555 grpc::testing::TestEnvironment env(argc, argv);
556 auto result = RUN_ALL_TESTS();
557 return result;
558 }
559