1 // 2 // 3 // Copyright 2018 gRPC authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 // 18 19 #ifndef GRPC_SRC_CORE_LIB_CHANNEL_CHANNELZ_H 20 #define GRPC_SRC_CORE_LIB_CHANNEL_CHANNELZ_H 21 22 #include <grpc/support/port_platform.h> 23 24 #include <stddef.h> 25 26 #include <atomic> 27 #include <cstdint> 28 #include <map> 29 #include <set> 30 #include <string> 31 #include <utility> 32 33 #include "absl/strings/string_view.h" 34 #include "absl/types/optional.h" 35 36 #include <grpc/grpc.h> 37 #include <grpc/impl/connectivity_state.h> 38 #include <grpc/slice.h> 39 40 #include "src/core/lib/channel/channel_trace.h" 41 #include "src/core/lib/gpr/time_precise.h" 42 #include "src/core/lib/gpr/useful.h" 43 #include "src/core/lib/gprpp/per_cpu.h" 44 #include "src/core/lib/gprpp/ref_counted.h" 45 #include "src/core/lib/gprpp/ref_counted_ptr.h" 46 #include "src/core/lib/gprpp/sync.h" 47 #include "src/core/lib/json/json.h" 48 49 // Channel arg key for channelz node. 50 #define GRPC_ARG_CHANNELZ_CHANNEL_NODE "grpc.internal.channelz_channel_node" 51 52 // Channel arg key for indicating an internal channel. 53 #define GRPC_ARG_CHANNELZ_IS_INTERNAL_CHANNEL \ 54 "grpc.channelz_is_internal_channel" 55 56 /// This is the default value for whether or not to enable channelz. If 57 /// GRPC_ARG_ENABLE_CHANNELZ is set, it will override this default value. 58 #define GRPC_ENABLE_CHANNELZ_DEFAULT true 59 60 /// This is the default value for the maximum amount of memory used by trace 61 /// events per channel trace node. If 62 /// GRPC_ARG_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE is set, it will override 63 /// this default value. 64 #define GRPC_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE_DEFAULT (1024 * 4) 65 66 namespace grpc_core { 67 68 namespace channelz { 69 70 class SocketNode; 71 class ListenSocketNode; 72 73 namespace testing { 74 class CallCountingHelperPeer; 75 class ChannelNodePeer; 76 } // namespace testing 77 78 // base class for all channelz entities 79 class BaseNode : public RefCounted<BaseNode> { 80 public: 81 // There are only four high level channelz entities. However, to support 82 // GetTopChannelsRequest, we split the Channel entity into two different 83 // types. All children of BaseNode must be one of these types. 84 enum class EntityType { 85 kTopLevelChannel, 86 kInternalChannel, 87 kSubchannel, 88 kServer, 89 kSocket, 90 }; 91 92 protected: 93 BaseNode(EntityType type, std::string name); 94 95 public: 96 ~BaseNode() override; 97 98 // All children must implement this function. 99 virtual Json RenderJson() = 0; 100 101 // Renders the json and returns allocated string that must be freed by the 102 // caller. 103 std::string RenderJsonString(); 104 type()105 EntityType type() const { return type_; } uuid()106 intptr_t uuid() const { return uuid_; } name()107 const std::string& name() const { return name_; } 108 109 private: 110 // to allow the ChannelzRegistry to set uuid_ under its lock. 111 friend class ChannelzRegistry; 112 const EntityType type_; 113 intptr_t uuid_; 114 std::string name_; 115 }; 116 117 // This class is a helper class for channelz entities that deal with Channels, 118 // Subchannels, and Servers, since those have similar proto definitions. 119 // This class has the ability to: 120 // - track calls_{started,succeeded,failed} 121 // - track last_call_started_timestamp 122 // - perform rendering of the above items 123 class CallCountingHelper final { 124 public: 125 void RecordCallStarted(); 126 void RecordCallFailed(); 127 void RecordCallSucceeded(); 128 129 // Common rendering of the call count data and last_call_started_timestamp. 130 void PopulateCallCounts(Json::Object* json); 131 132 private: 133 // testing peer friend. 134 friend class testing::CallCountingHelperPeer; 135 136 std::atomic<int64_t> calls_started_{0}; 137 std::atomic<int64_t> calls_succeeded_{0}; 138 std::atomic<int64_t> calls_failed_{0}; 139 std::atomic<gpr_cycle_counter> last_call_started_cycle_{0}; 140 }; 141 142 class PerCpuCallCountingHelper final { 143 public: 144 void RecordCallStarted(); 145 void RecordCallFailed(); 146 void RecordCallSucceeded(); 147 148 // Common rendering of the call count data and last_call_started_timestamp. 149 void PopulateCallCounts(Json::Object* json); 150 151 private: 152 // testing peer friend. 153 friend class testing::CallCountingHelperPeer; 154 155 // We want to ensure that this per-cpu data structure lands on different 156 // cachelines per cpu. 157 // With C++17 we can do so explicitly with an `alignas` specifier. 158 // Prior versions we can at best approximate it by padding the structure. 159 // It'll probably work out ok, but it's not guaranteed across allocators. 160 // (in the bad case where this gets split across cachelines we'll just have 161 // two cpus fighting over the same cacheline with a slight performance 162 // degregation). 163 // TODO(ctiller): When we move to C++17 delete the duplicate definition. 164 #if __cplusplus >= 201703L 165 struct alignas(GPR_CACHELINE_SIZE) PerCpuData { 166 std::atomic<int64_t> calls_started{0}; 167 std::atomic<int64_t> calls_succeeded{0}; 168 std::atomic<int64_t> calls_failed{0}; 169 std::atomic<gpr_cycle_counter> last_call_started_cycle{0}; 170 }; 171 #else 172 struct PerCpuDataHeader { 173 std::atomic<int64_t> calls_started{0}; 174 std::atomic<int64_t> calls_succeeded{0}; 175 std::atomic<int64_t> calls_failed{0}; 176 std::atomic<gpr_cycle_counter> last_call_started_cycle{0}; 177 }; 178 struct PerCpuData : public PerCpuDataHeader { 179 uint8_t padding[GPR_CACHELINE_SIZE - sizeof(PerCpuDataHeader)]; 180 }; 181 #endif 182 PerCpu<PerCpuData> per_cpu_data_{PerCpuOptions().SetCpusPerShard(4)}; 183 }; 184 185 // Handles channelz bookkeeping for channels 186 class ChannelNode final : public BaseNode { 187 public: 188 ChannelNode(std::string target, size_t channel_tracer_max_nodes, 189 bool is_internal_channel); 190 ChannelArgName()191 static absl::string_view ChannelArgName() { 192 return GRPC_ARG_CHANNELZ_CHANNEL_NODE; 193 } ChannelArgsCompare(const ChannelNode * a,const ChannelNode * b)194 static int ChannelArgsCompare(const ChannelNode* a, const ChannelNode* b) { 195 return QsortCompare(a, b); 196 } 197 198 // Returns the string description of the given connectivity state. 199 static const char* GetChannelConnectivityStateChangeString( 200 grpc_connectivity_state state); 201 202 Json RenderJson() override; 203 204 // proxy methods to composed classes. AddTraceEvent(ChannelTrace::Severity severity,const grpc_slice & data)205 void AddTraceEvent(ChannelTrace::Severity severity, const grpc_slice& data) { 206 trace_.AddTraceEvent(severity, data); 207 } AddTraceEventWithReference(ChannelTrace::Severity severity,const grpc_slice & data,RefCountedPtr<BaseNode> referenced_channel)208 void AddTraceEventWithReference(ChannelTrace::Severity severity, 209 const grpc_slice& data, 210 RefCountedPtr<BaseNode> referenced_channel) { 211 trace_.AddTraceEventWithReference(severity, data, 212 std::move(referenced_channel)); 213 } RecordCallStarted()214 void RecordCallStarted() { call_counter_.RecordCallStarted(); } RecordCallFailed()215 void RecordCallFailed() { call_counter_.RecordCallFailed(); } RecordCallSucceeded()216 void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); } 217 218 void SetConnectivityState(grpc_connectivity_state state); 219 220 // TODO(roth): take in a RefCountedPtr to the child channel so we can retrieve 221 // the human-readable name. 222 void AddChildChannel(intptr_t child_uuid); 223 void RemoveChildChannel(intptr_t child_uuid); 224 225 // TODO(roth): take in a RefCountedPtr to the child subchannel so we can 226 // retrieve the human-readable name. 227 void AddChildSubchannel(intptr_t child_uuid); 228 void RemoveChildSubchannel(intptr_t child_uuid); 229 230 private: 231 // Allows the channel trace test to access trace_. 232 friend class testing::ChannelNodePeer; 233 234 void PopulateChildRefs(Json::Object* json); 235 236 std::string target_; 237 CallCountingHelper call_counter_; 238 ChannelTrace trace_; 239 240 // Least significant bit indicates whether the value is set. Remaining 241 // bits are a grpc_connectivity_state value. 242 std::atomic<int> connectivity_state_{0}; 243 244 Mutex child_mu_; // Guards sets below. 245 std::set<intptr_t> child_channels_; 246 std::set<intptr_t> child_subchannels_; 247 }; 248 249 // Handles channelz bookkeeping for servers 250 class ServerNode final : public BaseNode { 251 public: 252 explicit ServerNode(size_t channel_tracer_max_nodes); 253 254 ~ServerNode() override; 255 256 Json RenderJson() override; 257 258 std::string RenderServerSockets(intptr_t start_socket_id, 259 intptr_t max_results); 260 261 void AddChildSocket(RefCountedPtr<SocketNode> node); 262 263 void RemoveChildSocket(intptr_t child_uuid); 264 265 void AddChildListenSocket(RefCountedPtr<ListenSocketNode> node); 266 267 void RemoveChildListenSocket(intptr_t child_uuid); 268 269 // proxy methods to composed classes. AddTraceEvent(ChannelTrace::Severity severity,const grpc_slice & data)270 void AddTraceEvent(ChannelTrace::Severity severity, const grpc_slice& data) { 271 trace_.AddTraceEvent(severity, data); 272 } AddTraceEventWithReference(ChannelTrace::Severity severity,const grpc_slice & data,RefCountedPtr<BaseNode> referenced_channel)273 void AddTraceEventWithReference(ChannelTrace::Severity severity, 274 const grpc_slice& data, 275 RefCountedPtr<BaseNode> referenced_channel) { 276 trace_.AddTraceEventWithReference(severity, data, 277 std::move(referenced_channel)); 278 } RecordCallStarted()279 void RecordCallStarted() { call_counter_.RecordCallStarted(); } RecordCallFailed()280 void RecordCallFailed() { call_counter_.RecordCallFailed(); } RecordCallSucceeded()281 void RecordCallSucceeded() { call_counter_.RecordCallSucceeded(); } 282 283 private: 284 PerCpuCallCountingHelper call_counter_; 285 ChannelTrace trace_; 286 Mutex child_mu_; // Guards child maps below. 287 std::map<intptr_t, RefCountedPtr<SocketNode>> child_sockets_; 288 std::map<intptr_t, RefCountedPtr<ListenSocketNode>> child_listen_sockets_; 289 }; 290 291 #define GRPC_ARG_CHANNELZ_SECURITY "grpc.internal.channelz_security" 292 293 // Handles channelz bookkeeping for sockets 294 class SocketNode final : public BaseNode { 295 public: 296 struct Security : public RefCounted<Security> { 297 struct Tls { 298 // This is a workaround for https://bugs.llvm.org/show_bug.cgi?id=50346 TlsSecurity::Tls299 Tls() {} 300 301 enum class NameType { kUnset = 0, kStandardName = 1, kOtherName = 2 }; 302 NameType type = NameType::kUnset; 303 // Holds the value of standard_name or other_names if type is not kUnset. 304 std::string name; 305 std::string local_certificate; 306 std::string remote_certificate; 307 308 Json RenderJson(); 309 }; 310 enum class ModelType { kUnset = 0, kTls = 1, kOther = 2 }; 311 ModelType type = ModelType::kUnset; 312 absl::optional<Tls> tls; 313 absl::optional<Json> other; 314 315 Json RenderJson(); 316 ChannelArgNameSecurity317 static absl::string_view ChannelArgName() { 318 return GRPC_ARG_CHANNELZ_SECURITY; 319 } 320 ChannelArgsCompareSecurity321 static int ChannelArgsCompare(const Security* a, const Security* b) { 322 return QsortCompare(a, b); 323 } 324 325 grpc_arg MakeChannelArg() const; 326 327 static RefCountedPtr<Security> GetFromChannelArgs( 328 const grpc_channel_args* args); 329 }; 330 331 SocketNode(std::string local, std::string remote, std::string name, 332 RefCountedPtr<Security> security); ~SocketNode()333 ~SocketNode() override {} 334 335 Json RenderJson() override; 336 337 void RecordStreamStartedFromLocal(); 338 void RecordStreamStartedFromRemote(); RecordStreamSucceeded()339 void RecordStreamSucceeded() { 340 streams_succeeded_.fetch_add(1, std::memory_order_relaxed); 341 } RecordStreamFailed()342 void RecordStreamFailed() { 343 streams_failed_.fetch_add(1, std::memory_order_relaxed); 344 } 345 void RecordMessagesSent(uint32_t num_sent); 346 void RecordMessageReceived(); RecordKeepaliveSent()347 void RecordKeepaliveSent() { 348 keepalives_sent_.fetch_add(1, std::memory_order_relaxed); 349 } 350 remote()351 const std::string& remote() { return remote_; } 352 353 private: 354 std::atomic<int64_t> streams_started_{0}; 355 std::atomic<int64_t> streams_succeeded_{0}; 356 std::atomic<int64_t> streams_failed_{0}; 357 std::atomic<int64_t> messages_sent_{0}; 358 std::atomic<int64_t> messages_received_{0}; 359 std::atomic<int64_t> keepalives_sent_{0}; 360 std::atomic<gpr_cycle_counter> last_local_stream_created_cycle_{0}; 361 std::atomic<gpr_cycle_counter> last_remote_stream_created_cycle_{0}; 362 std::atomic<gpr_cycle_counter> last_message_sent_cycle_{0}; 363 std::atomic<gpr_cycle_counter> last_message_received_cycle_{0}; 364 std::string local_; 365 std::string remote_; 366 RefCountedPtr<Security> const security_; 367 }; 368 369 // Handles channelz bookkeeping for listen sockets 370 class ListenSocketNode final : public BaseNode { 371 public: 372 ListenSocketNode(std::string local_addr, std::string name); ~ListenSocketNode()373 ~ListenSocketNode() override {} 374 375 Json RenderJson() override; 376 377 private: 378 std::string local_addr_; 379 }; 380 381 } // namespace channelz 382 } // namespace grpc_core 383 384 #endif // GRPC_SRC_CORE_LIB_CHANNEL_CHANNELZ_H 385