1 //
2 //
3 // Copyright 2019 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18
19 #include <grpc/grpc.h>
20 #include <grpc/support/alloc.h>
21 #include <grpc/support/atm.h>
22 #include <grpc/support/port_platform.h>
23 #include <grpc/support/string_util.h>
24 #include <grpc/support/time.h>
25 #include <grpcpp/channel.h>
26 #include <grpcpp/client_context.h>
27 #include <grpcpp/create_channel.h>
28 #include <grpcpp/health_check_service_interface.h>
29 #include <grpcpp/server.h>
30 #include <grpcpp/server_builder.h>
31 #include <gtest/gtest.h>
32
33 #include <algorithm>
34 #include <condition_variable>
35 #include <memory>
36 #include <mutex>
37 #include <random>
38 #include <thread>
39
40 #include "absl/log/log.h"
41 #include "absl/memory/memory.h"
42 #include "src/core/util/backoff.h"
43 #include "src/core/util/crash.h"
44 #include "src/core/util/env.h"
45 #include "src/proto/grpc/testing/echo.grpc.pb.h"
46 #include "test/core/test_util/port.h"
47 #include "test/core/test_util/test_config.h"
48 #include "test/cpp/end2end/test_service_impl.h"
49 #include "test/cpp/util/test_credentials_provider.h"
50
51 #ifdef GPR_LINUX
52
53 namespace grpc {
54 namespace testing {
55 namespace {
56
57 struct TestScenario {
TestScenariogrpc::testing::__anon8b8736020111::TestScenario58 TestScenario(const std::string& creds_type, const std::string& content)
59 : credentials_type(creds_type), message_content(content) {}
60 const std::string credentials_type;
61 const std::string message_content;
62 };
63
64 class FlakyNetworkTest : public ::testing::TestWithParam<TestScenario> {
65 protected:
FlakyNetworkTest()66 FlakyNetworkTest()
67 : server_host_("grpctest"),
68 interface_("lo:1"),
69 ipv4_address_("10.0.0.1"),
70 netmask_("/32") {}
71
InterfaceUp()72 void InterfaceUp() {
73 std::ostringstream cmd;
74 // create interface_ with address ipv4_address_
75 cmd << "ip addr add " << ipv4_address_ << netmask_ << " dev " << interface_;
76 std::system(cmd.str().c_str());
77 }
78
InterfaceDown()79 void InterfaceDown() {
80 std::ostringstream cmd;
81 // remove interface_
82 cmd << "ip addr del " << ipv4_address_ << netmask_ << " dev " << interface_;
83 std::system(cmd.str().c_str());
84 }
85
DNSUp()86 void DNSUp() {
87 std::ostringstream cmd;
88 // Add DNS entry for server_host_ in /etc/hosts
89 cmd << "echo '" << ipv4_address_ << " " << server_host_
90 << "' >> /etc/hosts";
91 std::system(cmd.str().c_str());
92 }
93
DNSDown()94 void DNSDown() {
95 std::ostringstream cmd;
96 // Remove DNS entry for server_host_ from /etc/hosts
97 // NOTE: we can't do this in one step with sed -i because when we are
98 // running under docker, the file is mounted by docker so we can't change
99 // its inode from within the container (sed -i creates a new file and
100 // replaces the old file, which changes the inode)
101 cmd << "sed '/" << server_host_ << "/d' /etc/hosts > /etc/hosts.orig";
102 std::system(cmd.str().c_str());
103
104 // clear the stream
105 cmd.str("");
106
107 cmd << "cat /etc/hosts.orig > /etc/hosts";
108 std::system(cmd.str().c_str());
109 }
110
DropPackets()111 void DropPackets() {
112 std::ostringstream cmd;
113 // drop packets with src IP = ipv4_address_
114 cmd << "iptables -A INPUT -s " << ipv4_address_ << " -j DROP";
115
116 std::system(cmd.str().c_str());
117 // clear the stream
118 cmd.str("");
119
120 // drop packets with dst IP = ipv4_address_
121 cmd << "iptables -A INPUT -d " << ipv4_address_ << " -j DROP";
122 }
123
RestoreNetwork()124 void RestoreNetwork() {
125 std::ostringstream cmd;
126 // remove iptables rule to drop packets with src IP = ipv4_address_
127 cmd << "iptables -D INPUT -s " << ipv4_address_ << " -j DROP";
128 std::system(cmd.str().c_str());
129 // clear the stream
130 cmd.str("");
131 // remove iptables rule to drop packets with dest IP = ipv4_address_
132 cmd << "iptables -D INPUT -d " << ipv4_address_ << " -j DROP";
133 }
134
FlakeNetwork()135 void FlakeNetwork() {
136 std::ostringstream cmd;
137 // Emulate a flaky network connection over interface_. Add a delay of 100ms
138 // +/- 20ms, 0.1% packet loss, 1% duplicates and 0.01% corrupt packets.
139 cmd << "tc qdisc replace dev " << interface_
140 << " root netem delay 100ms 20ms distribution normal loss 0.1% "
141 "duplicate "
142 "0.1% corrupt 0.01% ";
143 std::system(cmd.str().c_str());
144 }
145
UnflakeNetwork()146 void UnflakeNetwork() {
147 // Remove simulated network flake on interface_
148 std::ostringstream cmd;
149 cmd << "tc qdisc del dev " << interface_ << " root netem";
150 std::system(cmd.str().c_str());
151 }
152
NetworkUp()153 void NetworkUp() {
154 InterfaceUp();
155 DNSUp();
156 }
157
NetworkDown()158 void NetworkDown() {
159 InterfaceDown();
160 DNSDown();
161 }
162
SetUp()163 void SetUp() override {
164 NetworkUp();
165 grpc_init();
166 StartServer();
167 }
168
TearDown()169 void TearDown() override {
170 NetworkDown();
171 StopServer();
172 grpc_shutdown();
173 }
174
StartServer()175 void StartServer() {
176 // TODO (pjaikumar): Ideally, we should allocate the port dynamically using
177 // grpc_pick_unused_port_or_die(). That doesn't work inside some docker
178 // containers because port_server listens on localhost which maps to
179 // ip6-looopback, but ipv6 support is not enabled by default in docker.
180 port_ = SERVER_PORT;
181
182 server_ = std::make_unique<ServerData>(port_, GetParam().credentials_type);
183 server_->Start(server_host_);
184 }
StopServer()185 void StopServer() { server_->Shutdown(); }
186
BuildStub(const std::shared_ptr<Channel> & channel)187 std::unique_ptr<grpc::testing::EchoTestService::Stub> BuildStub(
188 const std::shared_ptr<Channel>& channel) {
189 return grpc::testing::EchoTestService::NewStub(channel);
190 }
191
BuildChannel(const std::string & lb_policy_name,ChannelArguments args=ChannelArguments ())192 std::shared_ptr<Channel> BuildChannel(
193 const std::string& lb_policy_name,
194 ChannelArguments args = ChannelArguments()) {
195 if (!lb_policy_name.empty()) {
196 args.SetLoadBalancingPolicyName(lb_policy_name);
197 } // else, default to pick first
198 auto channel_creds = GetCredentialsProvider()->GetChannelCredentials(
199 GetParam().credentials_type, &args);
200 std::ostringstream server_address;
201 server_address << server_host_ << ":" << port_;
202 return CreateCustomChannel(server_address.str(), channel_creds, args);
203 }
204
SendRpc(const std::unique_ptr<grpc::testing::EchoTestService::Stub> & stub,int timeout_ms=0,bool wait_for_ready=false)205 bool SendRpc(
206 const std::unique_ptr<grpc::testing::EchoTestService::Stub>& stub,
207 int timeout_ms = 0, bool wait_for_ready = false) {
208 auto response = std::make_unique<EchoResponse>();
209 EchoRequest request;
210 auto& msg = GetParam().message_content;
211 request.set_message(msg);
212 ClientContext context;
213 if (timeout_ms > 0) {
214 context.set_deadline(grpc_timeout_milliseconds_to_deadline(timeout_ms));
215 // Allow an RPC to be canceled (for deadline exceeded) after it has
216 // reached the server.
217 request.mutable_param()->set_skip_cancelled_check(true);
218 }
219 // See https://github.com/grpc/grpc/blob/master/doc/wait-for-ready.md for
220 // details of wait-for-ready semantics
221 if (wait_for_ready) {
222 context.set_wait_for_ready(true);
223 }
224 Status status = stub->Echo(&context, request, response.get());
225 auto ok = status.ok();
226 if (ok) {
227 VLOG(2) << "RPC succeeded";
228 } else {
229 VLOG(2) << "RPC failed: " << status.error_message();
230 }
231 return ok;
232 }
233
234 struct ServerData {
235 int port_;
236 const std::string creds_;
237 std::unique_ptr<Server> server_;
238 TestServiceImpl service_;
239 std::unique_ptr<std::thread> thread_;
240 bool server_ready_ = false;
241
ServerDatagrpc::testing::__anon8b8736020111::FlakyNetworkTest::ServerData242 ServerData(int port, const std::string& creds)
243 : port_(port), creds_(creds) {}
244
Startgrpc::testing::__anon8b8736020111::FlakyNetworkTest::ServerData245 void Start(const std::string& server_host) {
246 LOG(INFO) << "starting server on port " << port_;
247 std::mutex mu;
248 std::unique_lock<std::mutex> lock(mu);
249 std::condition_variable cond;
250 thread_ = std::make_unique<std::thread>(
251 std::bind(&ServerData::Serve, this, server_host, &mu, &cond));
252 cond.wait(lock, [this] { return server_ready_; });
253 server_ready_ = false;
254 LOG(INFO) << "server startup complete";
255 }
256
Servegrpc::testing::__anon8b8736020111::FlakyNetworkTest::ServerData257 void Serve(const std::string& server_host, std::mutex* mu,
258 std::condition_variable* cond) {
259 std::ostringstream server_address;
260 server_address << server_host << ":" << port_;
261 ServerBuilder builder;
262 auto server_creds =
263 GetCredentialsProvider()->GetServerCredentials(creds_);
264 builder.AddListeningPort(server_address.str(), server_creds);
265 builder.RegisterService(&service_);
266 server_ = builder.BuildAndStart();
267 std::lock_guard<std::mutex> lock(*mu);
268 server_ready_ = true;
269 cond->notify_one();
270 }
271
Shutdowngrpc::testing::__anon8b8736020111::FlakyNetworkTest::ServerData272 void Shutdown() {
273 server_->Shutdown(grpc_timeout_milliseconds_to_deadline(0));
274 thread_->join();
275 }
276 };
277
WaitForChannelNotReady(Channel * channel,int timeout_seconds=5)278 bool WaitForChannelNotReady(Channel* channel, int timeout_seconds = 5) {
279 const gpr_timespec deadline =
280 grpc_timeout_seconds_to_deadline(timeout_seconds);
281 grpc_connectivity_state state;
282 while ((state = channel->GetState(false /* try_to_connect */)) ==
283 GRPC_CHANNEL_READY) {
284 if (!channel->WaitForStateChange(state, deadline)) return false;
285 }
286 return true;
287 }
288
WaitForChannelReady(Channel * channel,int timeout_seconds=5)289 bool WaitForChannelReady(Channel* channel, int timeout_seconds = 5) {
290 const gpr_timespec deadline =
291 grpc_timeout_seconds_to_deadline(timeout_seconds);
292 grpc_connectivity_state state;
293 while ((state = channel->GetState(true /* try_to_connect */)) !=
294 GRPC_CHANNEL_READY) {
295 if (!channel->WaitForStateChange(state, deadline)) return false;
296 }
297 return true;
298 }
299
300 private:
301 const std::string server_host_;
302 const std::string interface_;
303 const std::string ipv4_address_;
304 const std::string netmask_;
305 std::unique_ptr<grpc::testing::EchoTestService::Stub> stub_;
306 std::unique_ptr<ServerData> server_;
307 const int SERVER_PORT = 32750;
308 int port_;
309 };
310
CreateTestScenarios()311 std::vector<TestScenario> CreateTestScenarios() {
312 std::vector<TestScenario> scenarios;
313 std::vector<std::string> credentials_types;
314 std::vector<std::string> messages;
315
316 credentials_types.push_back(kInsecureCredentialsType);
317 auto sec_list = GetCredentialsProvider()->GetSecureCredentialsTypeList();
318 for (auto sec = sec_list.begin(); sec != sec_list.end(); sec++) {
319 credentials_types.push_back(*sec);
320 }
321
322 messages.push_back("");
323 for (size_t k = 1; k < GRPC_DEFAULT_MAX_RECV_MESSAGE_LENGTH / 1024; k *= 32) {
324 std::string big_msg;
325 for (size_t i = 0; i < k * 1024; ++i) {
326 char c = 'a' + (i % 26);
327 big_msg += c;
328 }
329 messages.push_back(big_msg);
330 }
331 for (auto cred = credentials_types.begin(); cred != credentials_types.end();
332 ++cred) {
333 for (auto msg = messages.begin(); msg != messages.end(); msg++) {
334 scenarios.emplace_back(*cred, *msg);
335 }
336 }
337
338 return scenarios;
339 }
340
341 INSTANTIATE_TEST_SUITE_P(FlakyNetworkTest, FlakyNetworkTest,
342 ::testing::ValuesIn(CreateTestScenarios()));
343
344 // Network interface connected to server flaps
TEST_P(FlakyNetworkTest,NetworkTransition)345 TEST_P(FlakyNetworkTest, NetworkTransition) {
346 const int kKeepAliveTimeMs = 1000;
347 const int kKeepAliveTimeoutMs = 1000;
348 ChannelArguments args;
349 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
350 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
351 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
352 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
353
354 auto channel = BuildChannel("pick_first", args);
355 auto stub = BuildStub(channel);
356 // Channel should be in READY state after we send an RPC
357 EXPECT_TRUE(SendRpc(stub));
358 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
359
360 std::atomic_bool shutdown{false};
361 std::thread sender = std::thread([this, &stub, &shutdown]() {
362 while (true) {
363 if (shutdown.load()) {
364 return;
365 }
366 SendRpc(stub);
367 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
368 }
369 });
370
371 // bring down network
372 NetworkDown();
373 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
374 // bring network interface back up
375 InterfaceUp();
376 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
377 // Restore DNS entry for server
378 DNSUp();
379 EXPECT_TRUE(WaitForChannelReady(channel.get()));
380 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
381 shutdown.store(true);
382 sender.join();
383 }
384
385 // Traffic to server server is blackholed temporarily with keepalives enabled
TEST_P(FlakyNetworkTest,ServerUnreachableWithKeepalive)386 TEST_P(FlakyNetworkTest, ServerUnreachableWithKeepalive) {
387 const int kKeepAliveTimeMs = 1000;
388 const int kKeepAliveTimeoutMs = 1000;
389 const int kReconnectBackoffMs = 1000;
390 ChannelArguments args;
391 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
392 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
393 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
394 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
395 // max time for a connection attempt
396 args.SetInt(GRPC_ARG_MIN_RECONNECT_BACKOFF_MS, kReconnectBackoffMs);
397 // max time between reconnect attempts
398 args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, kReconnectBackoffMs);
399
400 VLOG(2) << "FlakyNetworkTest.ServerUnreachableWithKeepalive start";
401 auto channel = BuildChannel("pick_first", args);
402 auto stub = BuildStub(channel);
403 // Channel should be in READY state after we send an RPC
404 EXPECT_TRUE(SendRpc(stub));
405 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
406
407 std::atomic_bool shutdown{false};
408 std::thread sender = std::thread([this, &stub, &shutdown]() {
409 while (true) {
410 if (shutdown.load()) {
411 return;
412 }
413 SendRpc(stub);
414 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
415 }
416 });
417
418 // break network connectivity
419 VLOG(2) << "Adding iptables rule to drop packets";
420 DropPackets();
421 std::this_thread::sleep_for(std::chrono::milliseconds(10000));
422 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
423 // bring network interface back up
424 RestoreNetwork();
425 VLOG(2) << "Removed iptables rule to drop packets";
426 EXPECT_TRUE(WaitForChannelReady(channel.get()));
427 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
428 shutdown.store(true);
429 sender.join();
430 VLOG(2) << "FlakyNetworkTest.ServerUnreachableWithKeepalive end";
431 }
432
433 //
434 // Traffic to server server is blackholed temporarily with keepalives disabled
TEST_P(FlakyNetworkTest,ServerUnreachableNoKeepalive)435 TEST_P(FlakyNetworkTest, ServerUnreachableNoKeepalive) {
436 auto channel = BuildChannel("pick_first", ChannelArguments());
437 auto stub = BuildStub(channel);
438 // Channel should be in READY state after we send an RPC
439 EXPECT_TRUE(SendRpc(stub));
440 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
441
442 // break network connectivity
443 DropPackets();
444
445 std::thread sender = std::thread([this, &stub]() {
446 // RPC with deadline should timeout
447 EXPECT_FALSE(SendRpc(stub, /*timeout_ms=*/500, /*wait_for_ready=*/true));
448 // RPC without deadline forever until call finishes
449 EXPECT_TRUE(SendRpc(stub, /*timeout_ms=*/0, /*wait_for_ready=*/true));
450 });
451
452 std::this_thread::sleep_for(std::chrono::milliseconds(2000));
453 // bring network interface back up
454 RestoreNetwork();
455
456 // wait for RPC to finish
457 sender.join();
458 }
459
460 // Send RPCs over a flaky network connection
TEST_P(FlakyNetworkTest,FlakyNetwork)461 TEST_P(FlakyNetworkTest, FlakyNetwork) {
462 const int kKeepAliveTimeMs = 1000;
463 const int kKeepAliveTimeoutMs = 1000;
464 const int kMessageCount = 100;
465 ChannelArguments args;
466 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
467 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
468 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
469 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
470
471 auto channel = BuildChannel("pick_first", args);
472 auto stub = BuildStub(channel);
473 // Channel should be in READY state after we send an RPC
474 EXPECT_TRUE(SendRpc(stub));
475 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
476
477 // simulate flaky network (packet loss, corruption and delays)
478 FlakeNetwork();
479 for (int i = 0; i < kMessageCount; ++i) {
480 SendRpc(stub);
481 }
482 // remove network flakiness
483 UnflakeNetwork();
484 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
485 }
486
487 // Server is shutdown gracefully and restarted. Client keepalives are enabled
TEST_P(FlakyNetworkTest,ServerRestartKeepaliveEnabled)488 TEST_P(FlakyNetworkTest, ServerRestartKeepaliveEnabled) {
489 const int kKeepAliveTimeMs = 1000;
490 const int kKeepAliveTimeoutMs = 1000;
491 ChannelArguments args;
492 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
493 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
494 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
495 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
496
497 auto channel = BuildChannel("pick_first", args);
498 auto stub = BuildStub(channel);
499 // Channel should be in READY state after we send an RPC
500 EXPECT_TRUE(SendRpc(stub));
501 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
502
503 // server goes down, client should detect server going down and calls should
504 // fail
505 StopServer();
506 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
507 EXPECT_FALSE(SendRpc(stub));
508
509 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
510
511 // server restarts, calls succeed
512 StartServer();
513 EXPECT_TRUE(WaitForChannelReady(channel.get()));
514 // EXPECT_TRUE(SendRpc(stub));
515 }
516
517 // Server is shutdown gracefully and restarted. Client keepalives are enabled
TEST_P(FlakyNetworkTest,ServerRestartKeepaliveDisabled)518 TEST_P(FlakyNetworkTest, ServerRestartKeepaliveDisabled) {
519 auto channel = BuildChannel("pick_first", ChannelArguments());
520 auto stub = BuildStub(channel);
521 // Channel should be in READY state after we send an RPC
522 EXPECT_TRUE(SendRpc(stub));
523 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
524
525 // server sends GOAWAY when it's shutdown, so client attempts to reconnect
526 StopServer();
527 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
528
529 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
530
531 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
532
533 // server restarts, calls succeed
534 StartServer();
535 EXPECT_TRUE(WaitForChannelReady(channel.get()));
536 }
537
538 } // namespace
539 } // namespace testing
540 } // namespace grpc
541 #endif // GPR_LINUX
542
main(int argc,char ** argv)543 int main(int argc, char** argv) {
544 ::testing::InitGoogleTest(&argc, argv);
545 grpc::testing::TestEnvironment env(&argc, argv);
546 auto result = RUN_ALL_TESTS();
547 return result;
548 }
549