1 //
2 // Copyright 2022 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h"
18
19 #include <grpc/support/port_platform.h>
20
21 #include <algorithm>
22 #include <cmath>
23 #include <limits>
24 #include <utility>
25 #include <vector>
26
27 #include "absl/functional/any_invocable.h"
28 #include "absl/log/check.h"
29
30 namespace grpc_core {
31
32 namespace {
33
34 constexpr uint16_t kMaxWeight = std::numeric_limits<uint16_t>::max();
35
36 // Assuming the mean of all known weights is M, StaticStrideScheduler will cap
37 // from above all known weights that are bigger than M*kMaxRatio (to
38 // M*kMaxRatio).
39 //
40 // This is done to limit the number of rounds for picks.
41 constexpr double kMaxRatio = 10;
42
43 // Assuming the mean of all known weights is M, StaticStrideScheduler will cap
44 // from below all known weights to M*kMinRatio.
45 //
46 // This is done as a performance optimization for edge cases when channels with
47 // large weights are non-accepting (and thus WeightedRoundRobin will retry
48 // picking them over and over again), and there are also channels with near-zero
49 // weights that are possibly accepting. In this case, without kMinRatio, it
50 // would potentially require WeightedRoundRobin to perform thousands of picks
51 // until it gets a single channel with near-zero weight. This was a part of what
52 // happened in b/276292666.
53 //
54 // The current value of 0.01 was chosen without any experimenting. It should
55 // ensure that WeightedRoundRobin doesn't do much more than an order of 100
56 // picks of non-accepting channels with high weights in such corner cases. But
57 // it also makes WeightedRoundRobin to send slightly more requests to
58 // potentially very bad tasks (that would have near-zero weights) than zero.
59 // This is not necessarily a downside, though. Perhaps this is not a problem at
60 // all and we should increase this value (to 0.05 or 0.1) to save CPU cycles.
61 //
62 // Note that this class treats weights that are exactly equal to zero as unknown
63 // and thus needing to be replaced with M. This behavior itself makes sense
64 // (fresh channels without feedback information will get an average flow of
65 // requests). However, it follows from this that this class will replace weight
66 // = 0 with M, but weight = epsilon with M*kMinRatio, and this step function is
67 // logically faulty. A demonstration of this is that the function that computes
68 // weights in WeightedRoundRobin
69 // (http://google3/production/rpc/stubs/core/loadbalancing/weightedroundrobin.cc;l=324-325;rcl=514986476)
70 // will cap some epsilon values to zero. There should be a clear distinction
71 // between "task is new, weight is unknown" and "task is unhealthy, weight is
72 // very low". A better solution would be to not mix "unknown" and "weight" into
73 // a single value but represent weights as std::optional<float> or, if memory
74 // usage is a concern, use NaN as the indicator of unknown weight.
75 constexpr double kMinRatio = 0.01;
76
77 } // namespace
78
Make(absl::Span<const float> float_weights,absl::AnyInvocable<uint32_t ()> next_sequence_func)79 absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make(
80 absl::Span<const float> float_weights,
81 absl::AnyInvocable<uint32_t()> next_sequence_func) {
82 if (float_weights.empty()) return absl::nullopt;
83 if (float_weights.size() == 1) return absl::nullopt;
84
85 // TODO(b/190488683): should we normalize negative weights to 0?
86
87 const size_t n = float_weights.size();
88 size_t num_zero_weight_channels = 0;
89 double sum = 0;
90 float unscaled_max = 0;
91 for (const float weight : float_weights) {
92 sum += weight;
93 unscaled_max = std::max(unscaled_max, weight);
94 if (weight == 0) {
95 ++num_zero_weight_channels;
96 }
97 }
98
99 if (num_zero_weight_channels == n) return absl::nullopt;
100
101 // Mean of non-zero weights before scaling to `kMaxWeight`.
102 const double unscaled_mean =
103 sum / static_cast<double>(n - num_zero_weight_channels);
104 const double ratio = unscaled_max / unscaled_mean;
105
106 // Adjust max value such that ratio does not exceed kMaxRatio. This should
107 // ensure that we on average do at most kMaxRatio rounds for picks.
108 if (ratio > kMaxRatio) {
109 unscaled_max = kMaxRatio * unscaled_mean;
110 }
111
112 // Scale weights such that the largest is equal to `kMaxWeight`. This should
113 // be accurate enough once we convert to an integer. Quantisation errors won't
114 // be measurable on borg.
115 // TODO(b/190488683): it may be more stable over updates if we try to keep
116 // `scaling_factor` consistent, and only change it when we can't accurately
117 // represent the new weights.
118 const double scaling_factor = kMaxWeight / unscaled_max;
119
120 // Note that since we cap the weights to stay within kMaxRatio, `mean` might
121 // not match the actual mean of the values that end up in the scheduler.
122 const uint16_t mean = std::lround(scaling_factor * unscaled_mean);
123
124 // We compute weight_lower_bound and cap it to 1 from below so that in the
125 // worst case we represent tiny weights as 1 but not as 0 (which would cause
126 // an infinite loop as in b/276292666). This capping to 1 is probably only
127 // useful in case someone misconfigures kMinRatio to be very small.
128 //
129 // NOMUTANTS -- We have tests for this expression, but they are not precise
130 // enough to catch errors of plus/minus 1, what mutation testing does.
131 const uint16_t weight_lower_bound =
132 std::max(static_cast<uint16_t>(1),
133 static_cast<uint16_t>(std::lround(mean * kMinRatio)));
134
135 std::vector<uint16_t> weights;
136 weights.reserve(n);
137 for (size_t i = 0; i < n; ++i) {
138 if (float_weights[i] == 0) { // Weight is unknown.
139 weights.push_back(mean);
140 } else {
141 const double float_weight_capped_from_above =
142 std::min(float_weights[i], unscaled_max);
143 const uint16_t weight =
144 std::lround(float_weight_capped_from_above * scaling_factor);
145 weights.push_back(std::max(weight, weight_lower_bound));
146 }
147 }
148
149 CHECK(weights.size() == float_weights.size());
150 return StaticStrideScheduler{std::move(weights),
151 std::move(next_sequence_func)};
152 }
153
StaticStrideScheduler(std::vector<uint16_t> weights,absl::AnyInvocable<uint32_t ()> next_sequence_func)154 StaticStrideScheduler::StaticStrideScheduler(
155 std::vector<uint16_t> weights,
156 absl::AnyInvocable<uint32_t()> next_sequence_func)
157 : next_sequence_func_(std::move(next_sequence_func)),
158 weights_(std::move(weights)) {
159 CHECK(next_sequence_func_ != nullptr);
160 }
161
Pick() const162 size_t StaticStrideScheduler::Pick() const {
163 while (true) {
164 const uint32_t sequence = next_sequence_func_();
165
166 // The sequence number is split in two: the lower %n gives the index of the
167 // backend, and the rest gives the number of times we've iterated through
168 // all backends. `generation` is used to deterministically decide whether
169 // we pick or skip the backend on this iteration, in proportion to the
170 // backend's weight.
171 const uint64_t backend_index = sequence % weights_.size();
172 const uint64_t generation = sequence / weights_.size();
173 const uint64_t weight = weights_[backend_index];
174
175 // We pick a backend `weight` times per `kMaxWeight` generations. The
176 // multiply and modulus ~evenly spread out the picks for a given backend
177 // between different generations. The offset by `backend_index` helps to
178 // reduce the chance of multiple consecutive non-picks: if we have two
179 // consecutive backends with an equal, say, 80% weight of the max, with no
180 // offset we would see 1/5 generations that skipped both.
181 // TODO(b/190488683): add test for offset efficacy.
182 const uint16_t kOffset = kMaxWeight / 2;
183 const uint16_t mod =
184 (weight * generation + backend_index * kOffset) % kMaxWeight;
185
186 if (mod < kMaxWeight - weight) {
187 // Probability of skipping = 1 - mean(weights) / max(weights).
188 // For a typical large-scale service using RR, max task utilization will
189 // be ~100% when mean utilization is ~80%. So ~20% of picks will be
190 // skipped.
191 continue;
192 }
193 return backend_index;
194 }
195 }
196
197 } // namespace grpc_core
198