1 // 2 // Copyright 2015 gRPC authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #ifndef GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H 18 #define GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H 19 20 #include <grpc/event_engine/event_engine.h> 21 #include <grpc/event_engine/slice.h> 22 #include <grpc/grpc.h> 23 #include <grpc/impl/connectivity_state.h> 24 #include <grpc/support/port_platform.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 28 #include <memory> 29 #include <string> 30 #include <utility> 31 32 #include "absl/base/thread_annotations.h" 33 #include "absl/container/inlined_vector.h" 34 #include "absl/status/status.h" 35 #include "absl/status/statusor.h" 36 #include "absl/strings/string_view.h" 37 #include "absl/types/optional.h" 38 #include "absl/types/variant.h" 39 #include "src/core/lib/channel/channel_args.h" 40 #include "src/core/lib/debug/trace.h" 41 #include "src/core/lib/iomgr/iomgr_fwd.h" 42 #include "src/core/lib/iomgr/resolved_address.h" 43 #include "src/core/load_balancing/backend_metric_data.h" 44 #include "src/core/load_balancing/subchannel_interface.h" 45 #include "src/core/resolver/endpoint_addresses.h" 46 #include "src/core/telemetry/metrics.h" 47 #include "src/core/util/debug_location.h" 48 #include "src/core/util/dual_ref_counted.h" 49 #include "src/core/util/orphanable.h" 50 #include "src/core/util/ref_counted.h" 51 #include "src/core/util/ref_counted_ptr.h" 52 #include "src/core/util/sync.h" 53 #include "src/core/util/work_serializer.h" 54 55 namespace grpc_core { 56 57 /// Interface for load balancing policies. 58 /// 59 /// The following concepts are used here: 60 /// 61 /// Channel: An abstraction that manages connections to backend servers 62 /// on behalf of a client application. The application creates a channel 63 /// for a given server name and then sends calls (RPCs) on it, and the 64 /// channel figures out which backend server to send each call to. A channel 65 /// contains a resolver, a load balancing policy (or a tree of LB policies), 66 /// and a set of one or more subchannels. 67 /// 68 /// Subchannel: A subchannel represents a connection to one backend server. 69 /// The LB policy decides which subchannels to create, manages the 70 /// connectivity state of those subchannels, and decides which subchannel 71 /// to send any given call to. 72 /// 73 /// Resolver: A plugin that takes a gRPC server URI and resolves it to a 74 /// list of one or more addresses and a service config, as described 75 /// in https://github.com/grpc/grpc/blob/master/doc/naming.md. See 76 /// resolver.h for the resolver API. 77 /// 78 /// Load Balancing (LB) Policy: A plugin that takes a list of addresses 79 /// from the resolver, maintains and manages a subchannel for each 80 /// backend address, and decides which subchannel to send each call on. 81 /// An LB policy has two parts: 82 /// - A LoadBalancingPolicy, which deals with the control plane work of 83 /// managing subchannels. 84 /// - A SubchannelPicker, which handles the data plane work of 85 /// determining which subchannel a given call should be sent on. 86 87 /// LoadBalancingPolicy API. 88 /// 89 /// Note: All methods with a "Locked" suffix must be called from the 90 /// work_serializer passed to the constructor. 91 /// 92 /// Any I/O done by the LB policy should be done under the pollset_set 93 /// returned by \a interested_parties(). 94 // TODO(roth): Once we move to EventManager-based polling, remove the 95 // interested_parties() hooks from the API. 96 class LoadBalancingPolicy : public InternallyRefCounted<LoadBalancingPolicy> { 97 public: 98 /// Interface for accessing per-call state. 99 /// Implemented by the client channel and used by the SubchannelPicker. 100 class CallState { 101 public: 102 CallState() = default; 103 virtual ~CallState() = default; 104 105 /// Allocates memory associated with the call, which will be 106 /// automatically freed when the call is complete. 107 /// It is more efficient to use this than to allocate memory directly 108 /// for allocations that need to be made on a per-call basis. 109 virtual void* Alloc(size_t size) = 0; 110 }; 111 112 /// Interface for accessing metadata. 113 /// Implemented by the client channel and used by the SubchannelPicker. 114 class MetadataInterface { 115 public: 116 virtual ~MetadataInterface() = default; 117 118 virtual absl::optional<absl::string_view> Lookup( 119 absl::string_view key, std::string* buffer) const = 0; 120 }; 121 122 /// A list of metadata mutations to be returned along with a PickResult. 123 class MetadataMutations { 124 public: 125 /// Sets a key/value pair. If the key is already present, it will 126 /// be replaced with the new value. Set(absl::string_view key,absl::string_view value)127 void Set(absl::string_view key, absl::string_view value) { 128 Set(key, grpc_event_engine::experimental::Slice::FromCopiedString(value)); 129 } Set(absl::string_view key,grpc_event_engine::experimental::Slice value)130 void Set(absl::string_view key, 131 grpc_event_engine::experimental::Slice value) { 132 metadata_.push_back({key, std::move(value)}); 133 } 134 135 private: 136 friend class MetadataMutationHandler; 137 138 // Avoid allocation if up to 3 additions per LB pick. Most expected 139 // use cases should be no more than 2, so this gives us a bit of slack. 140 // But it should be cheap to increase this value if we start seeing use 141 // cases with more than 3 additions. 142 absl::InlinedVector< 143 std::pair<absl::string_view, grpc_event_engine::experimental::Slice>, 3> 144 metadata_; 145 }; 146 147 /// Arguments used when picking a subchannel for a call. 148 struct PickArgs { 149 /// The path of the call. Indicates the RPC service and method name. 150 absl::string_view path; 151 /// Initial metadata associated with the picking call. 152 /// The LB policy may use the existing metadata to influence its routing 153 /// decision, and it may add new metadata elements to be sent with the 154 /// call to the chosen backend. 155 MetadataInterface* initial_metadata; 156 /// An interface for accessing call state. Can be used to allocate 157 /// memory associated with the call in an efficient way. 158 CallState* call_state; 159 }; 160 161 /// Interface for accessing backend metric data. 162 /// Implemented by the client channel and used by 163 /// SubchannelCallTrackerInterface. 164 class BackendMetricAccessor { 165 public: 166 virtual ~BackendMetricAccessor() = default; 167 168 /// Returns the backend metric data returned by the server for the call, 169 /// or null if no backend metric data was returned. 170 virtual const BackendMetricData* GetBackendMetricData() = 0; 171 }; 172 173 /// Interface for tracking subchannel calls. 174 /// Implemented by LB policy and used by the channel. 175 // TODO(roth): Before making this API public, consider whether we 176 // should just replace this with a CallTracer, similar to what Java does. 177 class SubchannelCallTrackerInterface { 178 public: 179 virtual ~SubchannelCallTrackerInterface() = default; 180 181 /// Called when a subchannel call is started after an LB pick. 182 virtual void Start() = 0; 183 184 /// Called when a subchannel call is completed. 185 /// The metadata may be modified by the implementation. However, the 186 /// implementation does not take ownership, so any data that needs to be 187 /// used after returning must be copied. 188 struct FinishArgs { 189 absl::string_view peer_address; 190 absl::Status status; 191 MetadataInterface* trailing_metadata; 192 BackendMetricAccessor* backend_metric_accessor; 193 }; 194 virtual void Finish(FinishArgs args) = 0; 195 }; 196 197 /// The result of picking a subchannel for a call. 198 struct PickResult { 199 /// A successful pick. 200 struct Complete { 201 /// The subchannel to be used for the call. Must be non-null. 202 RefCountedPtr<SubchannelInterface> subchannel; 203 204 /// Optionally set by the LB policy when it wishes to be notified 205 /// about the resulting subchannel call. 206 /// Note that if the pick is abandoned by the channel, this may never 207 /// be used. 208 std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker; 209 210 /// Metadata mutations to be applied to the call. 211 MetadataMutations metadata_mutations; 212 213 /// Authority override for the RPC. 214 /// Will be used only if the application has not explicitly set 215 /// the authority for the RPC. 216 grpc_event_engine::experimental::Slice authority_override; 217 218 explicit Complete( 219 RefCountedPtr<SubchannelInterface> sc, 220 std::unique_ptr<SubchannelCallTrackerInterface> tracker = nullptr, 221 MetadataMutations md = MetadataMutations(), 222 grpc_event_engine::experimental::Slice authority = 223 grpc_event_engine::experimental::Slice()) subchannelPickResult::Complete224 : subchannel(std::move(sc)), 225 subchannel_call_tracker(std::move(tracker)), 226 metadata_mutations(std::move(md)), 227 authority_override(std::move(authority)) {} 228 }; 229 230 /// Pick cannot be completed until something changes on the control 231 /// plane. The client channel will queue the pick and try again the 232 /// next time the picker is updated. 233 struct Queue {}; 234 235 /// Pick failed. If the call is wait_for_ready, the client channel 236 /// will wait for the next picker and try again; otherwise, it 237 /// will immediately fail the call with the status indicated (although 238 /// the call may be retried if the client channel is configured to do so). 239 struct Fail { 240 absl::Status status; 241 FailPickResult::Fail242 explicit Fail(absl::Status s) : status(s) {} 243 }; 244 245 /// Pick will be dropped with the status specified. 246 /// Unlike FailPick, the call will be dropped even if it is 247 /// wait_for_ready, and retries (if configured) will be inhibited. 248 struct Drop { 249 absl::Status status; 250 DropPickResult::Drop251 explicit Drop(absl::Status s) : status(s) {} 252 }; 253 254 // A pick result must be one of these types. 255 // Default to Queue, just to allow default construction. 256 absl::variant<Complete, Queue, Fail, Drop> result = Queue(); 257 258 PickResult() = default; 259 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult260 PickResult(Complete complete) : result(std::move(complete)) {} 261 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult262 PickResult(Queue queue) : result(queue) {} 263 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult264 PickResult(Fail fail) : result(std::move(fail)) {} 265 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult266 PickResult(Drop drop) : result(std::move(drop)) {} 267 }; 268 269 /// A subchannel picker is the object used to pick the subchannel to 270 /// use for a given call. This is implemented by the LB policy and 271 /// used by the client channel to perform picks. 272 /// 273 /// Pickers are intended to encapsulate all of the state and logic 274 /// needed on the data plane (i.e., to actually process picks for 275 /// individual calls sent on the channel) while excluding all of the 276 /// state and logic needed on the control plane (i.e., resolver 277 /// updates, connectivity state notifications, etc); the latter should 278 /// live in the LB policy object itself. 279 class SubchannelPicker : public DualRefCounted<SubchannelPicker> { 280 public: 281 SubchannelPicker(); 282 283 virtual PickResult Pick(PickArgs args) = 0; 284 285 protected: Orphaned()286 void Orphaned() override {} 287 }; 288 289 /// A proxy object implemented by the client channel and used by the 290 /// LB policy to communicate with the channel. 291 class ChannelControlHelper { 292 public: 293 ChannelControlHelper() = default; 294 virtual ~ChannelControlHelper() = default; 295 296 /// Creates a new subchannel with the specified channel args. 297 /// The args and per_address_args will be merged by the channel. 298 virtual RefCountedPtr<SubchannelInterface> CreateSubchannel( 299 const grpc_resolved_address& address, 300 const ChannelArgs& per_address_args, const ChannelArgs& args) = 0; 301 302 /// Sets the connectivity state and returns a new picker to be used 303 /// by the client channel. 304 virtual void UpdateState(grpc_connectivity_state state, 305 const absl::Status& status, 306 RefCountedPtr<SubchannelPicker> picker) = 0; 307 308 /// Requests that the resolver re-resolve. 309 virtual void RequestReresolution() = 0; 310 311 /// Returns the channel target. 312 virtual absl::string_view GetTarget() = 0; 313 314 /// Returns the channel authority. 315 virtual absl::string_view GetAuthority() = 0; 316 317 /// Returns the channel credentials from the parent channel. This can 318 /// be used to create a control-plane channel inside an LB policy. 319 virtual RefCountedPtr<grpc_channel_credentials> GetChannelCredentials() = 0; 320 321 /// Returns the UNSAFE ChannelCredentials used to construct the channel, 322 /// including bearer tokens. LB policies should generally have no use for 323 /// these credentials, and use of them is heavily discouraged. These must 324 /// be used VERY carefully to avoid sending bearer tokens to untrusted 325 /// servers, as the server could then impersonate the client. Generally, 326 /// it is safe to use these credentials only when communicating with the 327 /// backends. 328 virtual RefCountedPtr<grpc_channel_credentials> 329 GetUnsafeChannelCredentials() = 0; 330 331 /// Returns the EventEngine to use for timers and async work. 332 virtual grpc_event_engine::experimental::EventEngine* GetEventEngine() = 0; 333 334 /// Returns the stats plugin group for reporting metrics. 335 virtual GlobalStatsPluginRegistry::StatsPluginGroup& 336 GetStatsPluginGroup() = 0; 337 338 /// Adds a trace message associated with the channel. 339 enum TraceSeverity { TRACE_INFO, TRACE_WARNING, TRACE_ERROR }; 340 virtual void AddTraceEvent(TraceSeverity severity, 341 absl::string_view message) = 0; 342 }; 343 344 class DelegatingChannelControlHelper; 345 346 template <typename ParentPolicy> 347 class ParentOwningDelegatingChannelControlHelper; 348 349 /// Interface for configuration data used by an LB policy implementation. 350 /// Individual implementations will create a subclass that adds methods to 351 /// return the parameters they need. 352 class Config : public RefCounted<Config> { 353 public: 354 ~Config() override = default; 355 356 // Returns the load balancing policy name 357 virtual absl::string_view name() const = 0; 358 }; 359 360 /// Data passed to the UpdateLocked() method when new addresses and 361 /// config are available. 362 struct UpdateArgs { 363 /// A list of endpoints, each with one or more address, or an error 364 /// indicating a failure to obtain the list of addresses. 365 absl::StatusOr<std::shared_ptr<EndpointAddressesIterator>> addresses; 366 /// The LB policy config. 367 RefCountedPtr<Config> config; 368 /// A human-readable note providing context about the name resolution that 369 /// provided this update. LB policies may wish to include this message 370 /// in RPC failure status messages. For example, if the update has an 371 /// empty list of addresses, this message might say "no DNS entries 372 /// found for <name>". 373 std::string resolution_note; 374 375 // TODO(roth): Before making this a public API, find a better 376 // abstraction for representing channel args. 377 ChannelArgs args; 378 }; 379 380 /// Args used to instantiate an LB policy. 381 struct Args { 382 /// The work_serializer under which all LB policy calls will be run. 383 std::shared_ptr<WorkSerializer> work_serializer; 384 /// Channel control helper. 385 /// Note: LB policies MUST NOT call any method on the helper from 386 /// their constructor. 387 std::unique_ptr<ChannelControlHelper> channel_control_helper; 388 /// Channel args. 389 // TODO(roth): Find a better channel args representation for this API. 390 ChannelArgs args; 391 }; 392 393 explicit LoadBalancingPolicy(Args args, intptr_t initial_refcount = 1); 394 ~LoadBalancingPolicy() override; 395 396 // Not copyable nor movable. 397 LoadBalancingPolicy(const LoadBalancingPolicy&) = delete; 398 LoadBalancingPolicy& operator=(const LoadBalancingPolicy&) = delete; 399 400 /// Returns the name of the LB policy. 401 virtual absl::string_view name() const = 0; 402 403 /// Updates the policy with new data from the resolver. Will be invoked 404 /// immediately after LB policy is constructed, and then again whenever 405 /// the resolver returns a new result. The returned status indicates 406 /// whether the LB policy accepted the update; if non-OK, informs 407 /// polling-based resolvers that they should go into backoff delay and 408 /// eventually reattempt the resolution. 409 /// 410 /// The first time that UpdateLocked() is called, the LB policy will 411 /// generally not be able to determine the appropriate connectivity 412 /// state by the time UpdateLocked() returns (e.g., it will need to 413 /// wait for connectivity state notifications from each subchannel, 414 /// which will be delivered asynchronously). In this case, the LB 415 /// policy should not call the helper's UpdateState() method until it 416 /// does have a clear picture of the connectivity state (e.g., it 417 /// should wait for all subchannels to report connectivity state 418 /// before calling the helper's UpdateState() method), although it is 419 /// expected to do so within some short period of time. The parent of 420 /// the LB policy will assume that the policy's initial state is 421 /// CONNECTING and that picks should be queued. 422 virtual absl::Status UpdateLocked(UpdateArgs) = 0; // NOLINT 423 424 /// Tries to enter a READY connectivity state. 425 /// This is a no-op by default, since most LB policies never go into 426 /// IDLE state. ExitIdleLocked()427 virtual void ExitIdleLocked() {} 428 429 /// Resets connection backoff. 430 virtual void ResetBackoffLocked() = 0; 431 interested_parties()432 grpc_pollset_set* interested_parties() const { return interested_parties_; } 433 434 // Note: This must be invoked while holding the work_serializer. 435 void Orphan() override; 436 437 // A picker that returns PickResult::Queue for all picks. 438 // Also calls the parent LB policy's ExitIdleLocked() method when the 439 // first pick is seen. 440 class QueuePicker final : public SubchannelPicker { 441 public: QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)442 explicit QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent) 443 : parent_(std::move(parent)) {} 444 ~QueuePicker()445 ~QueuePicker() override { parent_.reset(DEBUG_LOCATION, "QueuePicker"); } 446 447 PickResult Pick(PickArgs args) override; 448 449 private: 450 Mutex mu_; 451 RefCountedPtr<LoadBalancingPolicy> parent_ ABSL_GUARDED_BY(&mu_); 452 }; 453 454 // A picker that returns PickResult::Fail for all picks. 455 class TransientFailurePicker final : public SubchannelPicker { 456 public: TransientFailurePicker(absl::Status status)457 explicit TransientFailurePicker(absl::Status status) : status_(status) {} 458 Pick(PickArgs)459 PickResult Pick(PickArgs /*args*/) override { 460 return PickResult::Fail(status_); 461 } 462 463 private: 464 absl::Status status_; 465 }; 466 467 // A picker that returns PickResult::Drop for all picks. 468 class DropPicker final : public SubchannelPicker { 469 public: DropPicker(absl::Status status)470 explicit DropPicker(absl::Status status) : status_(status) {} 471 Pick(PickArgs)472 PickResult Pick(PickArgs /*args*/) override { 473 return PickResult::Drop(status_); 474 } 475 476 private: 477 absl::Status status_; 478 }; 479 480 protected: work_serializer()481 std::shared_ptr<WorkSerializer> work_serializer() const { 482 return work_serializer_; 483 } 484 channel_args()485 const ChannelArgs& channel_args() const { return channel_args_; } 486 487 // Note: LB policies MUST NOT call any method on the helper from their 488 // constructor. channel_control_helper()489 ChannelControlHelper* channel_control_helper() const { 490 return channel_control_helper_.get(); 491 } 492 493 /// Shuts down the policy. 494 virtual void ShutdownLocked() = 0; 495 496 private: 497 /// Work Serializer under which LB policy actions take place. 498 std::shared_ptr<WorkSerializer> work_serializer_; 499 /// Owned pointer to interested parties in load balancing decisions. 500 grpc_pollset_set* interested_parties_; 501 /// Channel control helper. 502 std::unique_ptr<ChannelControlHelper> channel_control_helper_; 503 /// Channel args passed in. 504 // TODO(roth): Rework Args so that we don't need to capture channel args here. 505 ChannelArgs channel_args_; 506 }; 507 508 template <> 509 struct ArenaContextType<LoadBalancingPolicy::SubchannelCallTrackerInterface> { 510 static void Destroy(LoadBalancingPolicy::SubchannelCallTrackerInterface*) {} 511 }; 512 513 } // namespace grpc_core 514 515 #endif // GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H 516