• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #ifndef GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H
18 #define GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H
19 
20 #include <grpc/event_engine/event_engine.h>
21 #include <grpc/event_engine/slice.h>
22 #include <grpc/grpc.h>
23 #include <grpc/impl/connectivity_state.h>
24 #include <grpc/support/port_platform.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 
28 #include <memory>
29 #include <string>
30 #include <utility>
31 
32 #include "absl/base/thread_annotations.h"
33 #include "absl/container/inlined_vector.h"
34 #include "absl/status/status.h"
35 #include "absl/status/statusor.h"
36 #include "absl/strings/string_view.h"
37 #include "absl/types/optional.h"
38 #include "absl/types/variant.h"
39 #include "src/core/lib/channel/channel_args.h"
40 #include "src/core/lib/debug/trace.h"
41 #include "src/core/lib/iomgr/iomgr_fwd.h"
42 #include "src/core/lib/iomgr/resolved_address.h"
43 #include "src/core/load_balancing/backend_metric_data.h"
44 #include "src/core/load_balancing/subchannel_interface.h"
45 #include "src/core/resolver/endpoint_addresses.h"
46 #include "src/core/telemetry/metrics.h"
47 #include "src/core/util/debug_location.h"
48 #include "src/core/util/dual_ref_counted.h"
49 #include "src/core/util/orphanable.h"
50 #include "src/core/util/ref_counted.h"
51 #include "src/core/util/ref_counted_ptr.h"
52 #include "src/core/util/sync.h"
53 #include "src/core/util/work_serializer.h"
54 
55 namespace grpc_core {
56 
57 /// Interface for load balancing policies.
58 ///
59 /// The following concepts are used here:
60 ///
61 /// Channel: An abstraction that manages connections to backend servers
62 ///   on behalf of a client application.  The application creates a channel
63 ///   for a given server name and then sends calls (RPCs) on it, and the
64 ///   channel figures out which backend server to send each call to.  A channel
65 ///   contains a resolver, a load balancing policy (or a tree of LB policies),
66 ///   and a set of one or more subchannels.
67 ///
68 /// Subchannel: A subchannel represents a connection to one backend server.
69 ///   The LB policy decides which subchannels to create, manages the
70 ///   connectivity state of those subchannels, and decides which subchannel
71 ///   to send any given call to.
72 ///
73 /// Resolver: A plugin that takes a gRPC server URI and resolves it to a
74 ///   list of one or more addresses and a service config, as described
75 ///   in https://github.com/grpc/grpc/blob/master/doc/naming.md.  See
76 ///   resolver.h for the resolver API.
77 ///
78 /// Load Balancing (LB) Policy: A plugin that takes a list of addresses
79 ///   from the resolver, maintains and manages a subchannel for each
80 ///   backend address, and decides which subchannel to send each call on.
81 ///   An LB policy has two parts:
82 ///   - A LoadBalancingPolicy, which deals with the control plane work of
83 ///     managing subchannels.
84 ///   - A SubchannelPicker, which handles the data plane work of
85 ///     determining which subchannel a given call should be sent on.
86 
87 /// LoadBalancingPolicy API.
88 ///
89 /// Note: All methods with a "Locked" suffix must be called from the
90 /// work_serializer passed to the constructor.
91 ///
92 /// Any I/O done by the LB policy should be done under the pollset_set
93 /// returned by \a interested_parties().
94 // TODO(roth): Once we move to EventManager-based polling, remove the
95 // interested_parties() hooks from the API.
96 class LoadBalancingPolicy : public InternallyRefCounted<LoadBalancingPolicy> {
97  public:
98   /// Interface for accessing per-call state.
99   /// Implemented by the client channel and used by the SubchannelPicker.
100   class CallState {
101    public:
102     CallState() = default;
103     virtual ~CallState() = default;
104 
105     /// Allocates memory associated with the call, which will be
106     /// automatically freed when the call is complete.
107     /// It is more efficient to use this than to allocate memory directly
108     /// for allocations that need to be made on a per-call basis.
109     virtual void* Alloc(size_t size) = 0;
110   };
111 
112   /// Interface for accessing metadata.
113   /// Implemented by the client channel and used by the SubchannelPicker.
114   class MetadataInterface {
115    public:
116     virtual ~MetadataInterface() = default;
117 
118     virtual absl::optional<absl::string_view> Lookup(
119         absl::string_view key, std::string* buffer) const = 0;
120   };
121 
122   /// A list of metadata mutations to be returned along with a PickResult.
123   class MetadataMutations {
124    public:
125     /// Sets a key/value pair.  If the key is already present, it will
126     /// be replaced with the new value.
Set(absl::string_view key,absl::string_view value)127     void Set(absl::string_view key, absl::string_view value) {
128       Set(key, grpc_event_engine::experimental::Slice::FromCopiedString(value));
129     }
Set(absl::string_view key,grpc_event_engine::experimental::Slice value)130     void Set(absl::string_view key,
131              grpc_event_engine::experimental::Slice value) {
132       metadata_.push_back({key, std::move(value)});
133     }
134 
135    private:
136     friend class MetadataMutationHandler;
137 
138     // Avoid allocation if up to 3 additions per LB pick.  Most expected
139     // use cases should be no more than 2, so this gives us a bit of slack.
140     // But it should be cheap to increase this value if we start seeing use
141     // cases with more than 3 additions.
142     absl::InlinedVector<
143         std::pair<absl::string_view, grpc_event_engine::experimental::Slice>, 3>
144         metadata_;
145   };
146 
147   /// Arguments used when picking a subchannel for a call.
148   struct PickArgs {
149     /// The path of the call.  Indicates the RPC service and method name.
150     absl::string_view path;
151     /// Initial metadata associated with the picking call.
152     /// The LB policy may use the existing metadata to influence its routing
153     /// decision, and it may add new metadata elements to be sent with the
154     /// call to the chosen backend.
155     MetadataInterface* initial_metadata;
156     /// An interface for accessing call state.  Can be used to allocate
157     /// memory associated with the call in an efficient way.
158     CallState* call_state;
159   };
160 
161   /// Interface for accessing backend metric data.
162   /// Implemented by the client channel and used by
163   /// SubchannelCallTrackerInterface.
164   class BackendMetricAccessor {
165    public:
166     virtual ~BackendMetricAccessor() = default;
167 
168     /// Returns the backend metric data returned by the server for the call,
169     /// or null if no backend metric data was returned.
170     virtual const BackendMetricData* GetBackendMetricData() = 0;
171   };
172 
173   /// Interface for tracking subchannel calls.
174   /// Implemented by LB policy and used by the channel.
175   // TODO(roth): Before making this API public, consider whether we
176   // should just replace this with a CallTracer, similar to what Java does.
177   class SubchannelCallTrackerInterface {
178    public:
179     virtual ~SubchannelCallTrackerInterface() = default;
180 
181     /// Called when a subchannel call is started after an LB pick.
182     virtual void Start() = 0;
183 
184     /// Called when a subchannel call is completed.
185     /// The metadata may be modified by the implementation.  However, the
186     /// implementation does not take ownership, so any data that needs to be
187     /// used after returning must be copied.
188     struct FinishArgs {
189       absl::string_view peer_address;
190       absl::Status status;
191       MetadataInterface* trailing_metadata;
192       BackendMetricAccessor* backend_metric_accessor;
193     };
194     virtual void Finish(FinishArgs args) = 0;
195   };
196 
197   /// The result of picking a subchannel for a call.
198   struct PickResult {
199     /// A successful pick.
200     struct Complete {
201       /// The subchannel to be used for the call.  Must be non-null.
202       RefCountedPtr<SubchannelInterface> subchannel;
203 
204       /// Optionally set by the LB policy when it wishes to be notified
205       /// about the resulting subchannel call.
206       /// Note that if the pick is abandoned by the channel, this may never
207       /// be used.
208       std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker;
209 
210       /// Metadata mutations to be applied to the call.
211       MetadataMutations metadata_mutations;
212 
213       /// Authority override for the RPC.
214       /// Will be used only if the application has not explicitly set
215       /// the authority for the RPC.
216       grpc_event_engine::experimental::Slice authority_override;
217 
218       explicit Complete(
219           RefCountedPtr<SubchannelInterface> sc,
220           std::unique_ptr<SubchannelCallTrackerInterface> tracker = nullptr,
221           MetadataMutations md = MetadataMutations(),
222           grpc_event_engine::experimental::Slice authority =
223               grpc_event_engine::experimental::Slice())
subchannelPickResult::Complete224           : subchannel(std::move(sc)),
225             subchannel_call_tracker(std::move(tracker)),
226             metadata_mutations(std::move(md)),
227             authority_override(std::move(authority)) {}
228     };
229 
230     /// Pick cannot be completed until something changes on the control
231     /// plane.  The client channel will queue the pick and try again the
232     /// next time the picker is updated.
233     struct Queue {};
234 
235     /// Pick failed.  If the call is wait_for_ready, the client channel
236     /// will wait for the next picker and try again; otherwise, it
237     /// will immediately fail the call with the status indicated (although
238     /// the call may be retried if the client channel is configured to do so).
239     struct Fail {
240       absl::Status status;
241 
FailPickResult::Fail242       explicit Fail(absl::Status s) : status(s) {}
243     };
244 
245     /// Pick will be dropped with the status specified.
246     /// Unlike FailPick, the call will be dropped even if it is
247     /// wait_for_ready, and retries (if configured) will be inhibited.
248     struct Drop {
249       absl::Status status;
250 
DropPickResult::Drop251       explicit Drop(absl::Status s) : status(s) {}
252     };
253 
254     // A pick result must be one of these types.
255     // Default to Queue, just to allow default construction.
256     absl::variant<Complete, Queue, Fail, Drop> result = Queue();
257 
258     PickResult() = default;
259     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult260     PickResult(Complete complete) : result(std::move(complete)) {}
261     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult262     PickResult(Queue queue) : result(queue) {}
263     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult264     PickResult(Fail fail) : result(std::move(fail)) {}
265     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult266     PickResult(Drop drop) : result(std::move(drop)) {}
267   };
268 
269   /// A subchannel picker is the object used to pick the subchannel to
270   /// use for a given call.  This is implemented by the LB policy and
271   /// used by the client channel to perform picks.
272   ///
273   /// Pickers are intended to encapsulate all of the state and logic
274   /// needed on the data plane (i.e., to actually process picks for
275   /// individual calls sent on the channel) while excluding all of the
276   /// state and logic needed on the control plane (i.e., resolver
277   /// updates, connectivity state notifications, etc); the latter should
278   /// live in the LB policy object itself.
279   class SubchannelPicker : public DualRefCounted<SubchannelPicker> {
280    public:
281     SubchannelPicker();
282 
283     virtual PickResult Pick(PickArgs args) = 0;
284 
285    protected:
Orphaned()286     void Orphaned() override {}
287   };
288 
289   /// A proxy object implemented by the client channel and used by the
290   /// LB policy to communicate with the channel.
291   class ChannelControlHelper {
292    public:
293     ChannelControlHelper() = default;
294     virtual ~ChannelControlHelper() = default;
295 
296     /// Creates a new subchannel with the specified channel args.
297     /// The args and per_address_args will be merged by the channel.
298     virtual RefCountedPtr<SubchannelInterface> CreateSubchannel(
299         const grpc_resolved_address& address,
300         const ChannelArgs& per_address_args, const ChannelArgs& args) = 0;
301 
302     /// Sets the connectivity state and returns a new picker to be used
303     /// by the client channel.
304     virtual void UpdateState(grpc_connectivity_state state,
305                              const absl::Status& status,
306                              RefCountedPtr<SubchannelPicker> picker) = 0;
307 
308     /// Requests that the resolver re-resolve.
309     virtual void RequestReresolution() = 0;
310 
311     /// Returns the channel target.
312     virtual absl::string_view GetTarget() = 0;
313 
314     /// Returns the channel authority.
315     virtual absl::string_view GetAuthority() = 0;
316 
317     /// Returns the channel credentials from the parent channel.  This can
318     /// be used to create a control-plane channel inside an LB policy.
319     virtual RefCountedPtr<grpc_channel_credentials> GetChannelCredentials() = 0;
320 
321     /// Returns the UNSAFE ChannelCredentials used to construct the channel,
322     /// including bearer tokens.  LB policies should generally have no use for
323     /// these credentials, and use of them is heavily discouraged.  These must
324     /// be used VERY carefully to avoid sending bearer tokens to untrusted
325     /// servers, as the server could then impersonate the client.  Generally,
326     /// it is safe to use these credentials only when communicating with the
327     /// backends.
328     virtual RefCountedPtr<grpc_channel_credentials>
329     GetUnsafeChannelCredentials() = 0;
330 
331     /// Returns the EventEngine to use for timers and async work.
332     virtual grpc_event_engine::experimental::EventEngine* GetEventEngine() = 0;
333 
334     /// Returns the stats plugin group for reporting metrics.
335     virtual GlobalStatsPluginRegistry::StatsPluginGroup&
336     GetStatsPluginGroup() = 0;
337 
338     /// Adds a trace message associated with the channel.
339     enum TraceSeverity { TRACE_INFO, TRACE_WARNING, TRACE_ERROR };
340     virtual void AddTraceEvent(TraceSeverity severity,
341                                absl::string_view message) = 0;
342   };
343 
344   class DelegatingChannelControlHelper;
345 
346   template <typename ParentPolicy>
347   class ParentOwningDelegatingChannelControlHelper;
348 
349   /// Interface for configuration data used by an LB policy implementation.
350   /// Individual implementations will create a subclass that adds methods to
351   /// return the parameters they need.
352   class Config : public RefCounted<Config> {
353    public:
354     ~Config() override = default;
355 
356     // Returns the load balancing policy name
357     virtual absl::string_view name() const = 0;
358   };
359 
360   /// Data passed to the UpdateLocked() method when new addresses and
361   /// config are available.
362   struct UpdateArgs {
363     /// A list of endpoints, each with one or more address, or an error
364     /// indicating a failure to obtain the list of addresses.
365     absl::StatusOr<std::shared_ptr<EndpointAddressesIterator>> addresses;
366     /// The LB policy config.
367     RefCountedPtr<Config> config;
368     /// A human-readable note providing context about the name resolution that
369     /// provided this update.  LB policies may wish to include this message
370     /// in RPC failure status messages.  For example, if the update has an
371     /// empty list of addresses, this message might say "no DNS entries
372     /// found for <name>".
373     std::string resolution_note;
374 
375     // TODO(roth): Before making this a public API, find a better
376     // abstraction for representing channel args.
377     ChannelArgs args;
378   };
379 
380   /// Args used to instantiate an LB policy.
381   struct Args {
382     /// The work_serializer under which all LB policy calls will be run.
383     std::shared_ptr<WorkSerializer> work_serializer;
384     /// Channel control helper.
385     /// Note: LB policies MUST NOT call any method on the helper from
386     /// their constructor.
387     std::unique_ptr<ChannelControlHelper> channel_control_helper;
388     /// Channel args.
389     // TODO(roth): Find a better channel args representation for this API.
390     ChannelArgs args;
391   };
392 
393   explicit LoadBalancingPolicy(Args args, intptr_t initial_refcount = 1);
394   ~LoadBalancingPolicy() override;
395 
396   // Not copyable nor movable.
397   LoadBalancingPolicy(const LoadBalancingPolicy&) = delete;
398   LoadBalancingPolicy& operator=(const LoadBalancingPolicy&) = delete;
399 
400   /// Returns the name of the LB policy.
401   virtual absl::string_view name() const = 0;
402 
403   /// Updates the policy with new data from the resolver.  Will be invoked
404   /// immediately after LB policy is constructed, and then again whenever
405   /// the resolver returns a new result.  The returned status indicates
406   /// whether the LB policy accepted the update; if non-OK, informs
407   /// polling-based resolvers that they should go into backoff delay and
408   /// eventually reattempt the resolution.
409   ///
410   /// The first time that UpdateLocked() is called, the LB policy will
411   /// generally not be able to determine the appropriate connectivity
412   /// state by the time UpdateLocked() returns (e.g., it will need to
413   /// wait for connectivity state notifications from each subchannel,
414   /// which will be delivered asynchronously).  In this case, the LB
415   /// policy should not call the helper's UpdateState() method until it
416   /// does have a clear picture of the connectivity state (e.g., it
417   /// should wait for all subchannels to report connectivity state
418   /// before calling the helper's UpdateState() method), although it is
419   /// expected to do so within some short period of time.  The parent of
420   /// the LB policy will assume that the policy's initial state is
421   /// CONNECTING and that picks should be queued.
422   virtual absl::Status UpdateLocked(UpdateArgs) = 0;  // NOLINT
423 
424   /// Tries to enter a READY connectivity state.
425   /// This is a no-op by default, since most LB policies never go into
426   /// IDLE state.
ExitIdleLocked()427   virtual void ExitIdleLocked() {}
428 
429   /// Resets connection backoff.
430   virtual void ResetBackoffLocked() = 0;
431 
interested_parties()432   grpc_pollset_set* interested_parties() const { return interested_parties_; }
433 
434   // Note: This must be invoked while holding the work_serializer.
435   void Orphan() override;
436 
437   // A picker that returns PickResult::Queue for all picks.
438   // Also calls the parent LB policy's ExitIdleLocked() method when the
439   // first pick is seen.
440   class QueuePicker final : public SubchannelPicker {
441    public:
QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)442     explicit QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)
443         : parent_(std::move(parent)) {}
444 
~QueuePicker()445     ~QueuePicker() override { parent_.reset(DEBUG_LOCATION, "QueuePicker"); }
446 
447     PickResult Pick(PickArgs args) override;
448 
449    private:
450     Mutex mu_;
451     RefCountedPtr<LoadBalancingPolicy> parent_ ABSL_GUARDED_BY(&mu_);
452   };
453 
454   // A picker that returns PickResult::Fail for all picks.
455   class TransientFailurePicker final : public SubchannelPicker {
456    public:
TransientFailurePicker(absl::Status status)457     explicit TransientFailurePicker(absl::Status status) : status_(status) {}
458 
Pick(PickArgs)459     PickResult Pick(PickArgs /*args*/) override {
460       return PickResult::Fail(status_);
461     }
462 
463    private:
464     absl::Status status_;
465   };
466 
467   // A picker that returns PickResult::Drop for all picks.
468   class DropPicker final : public SubchannelPicker {
469    public:
DropPicker(absl::Status status)470     explicit DropPicker(absl::Status status) : status_(status) {}
471 
Pick(PickArgs)472     PickResult Pick(PickArgs /*args*/) override {
473       return PickResult::Drop(status_);
474     }
475 
476    private:
477     absl::Status status_;
478   };
479 
480  protected:
work_serializer()481   std::shared_ptr<WorkSerializer> work_serializer() const {
482     return work_serializer_;
483   }
484 
channel_args()485   const ChannelArgs& channel_args() const { return channel_args_; }
486 
487   // Note: LB policies MUST NOT call any method on the helper from their
488   // constructor.
channel_control_helper()489   ChannelControlHelper* channel_control_helper() const {
490     return channel_control_helper_.get();
491   }
492 
493   /// Shuts down the policy.
494   virtual void ShutdownLocked() = 0;
495 
496  private:
497   /// Work Serializer under which LB policy actions take place.
498   std::shared_ptr<WorkSerializer> work_serializer_;
499   /// Owned pointer to interested parties in load balancing decisions.
500   grpc_pollset_set* interested_parties_;
501   /// Channel control helper.
502   std::unique_ptr<ChannelControlHelper> channel_control_helper_;
503   /// Channel args passed in.
504   // TODO(roth): Rework Args so that we don't need to capture channel args here.
505   ChannelArgs channel_args_;
506 };
507 
508 template <>
509 struct ArenaContextType<LoadBalancingPolicy::SubchannelCallTrackerInterface> {
510   static void Destroy(LoadBalancingPolicy::SubchannelCallTrackerInterface*) {}
511 };
512 
513 }  // namespace grpc_core
514 
515 #endif  // GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H
516