1 /* 2 * 3 * Copyright 2018 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 #ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H 20 #define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H 21 22 #include <grpc/support/port_platform.h> 23 24 #include <memory> 25 #include <set> 26 #include <unordered_map> 27 28 #include <grpc/support/log.h> 29 #include <grpcpp/impl/codegen/config.h> 30 31 #include "src/cpp/server/load_reporter/constants.h" 32 33 namespace grpc { 34 namespace load_reporter { 35 36 // The load data storage is organized in hierarchy. The LoadDataStore is the 37 // top-level data store. In LoadDataStore, for each host we keep a 38 // PerHostStore, in which for each balancer we keep a PerBalancerStore. Each 39 // PerBalancerStore maintains a map of load records, mapping from LoadRecordKey 40 // to LoadRecordValue. The LoadRecordValue contains a map of customized call 41 // metrics, mapping from a call metric name to the CallMetricValue. 42 43 // The value of a customized call metric. 44 class CallMetricValue { 45 public: 46 explicit CallMetricValue(uint64_t num_calls = 0, 47 double total_metric_value = 0) num_calls_(num_calls)48 : num_calls_(num_calls), total_metric_value_(total_metric_value) {} 49 MergeFrom(CallMetricValue other)50 void MergeFrom(CallMetricValue other) { 51 num_calls_ += other.num_calls_; 52 total_metric_value_ += other.total_metric_value_; 53 } 54 55 // Getters. num_calls()56 uint64_t num_calls() const { return num_calls_; } total_metric_value()57 double total_metric_value() const { return total_metric_value_; } 58 59 private: 60 // The number of calls that finished with this metric. 61 uint64_t num_calls_ = 0; 62 // The sum of metric values across all the calls that finished with this 63 // metric. 64 double total_metric_value_ = 0; 65 }; 66 67 // The key of a load record. 68 class LoadRecordKey { 69 public: LoadRecordKey(std::string lb_id,std::string lb_tag,std::string user_id,std::string client_ip_hex)70 LoadRecordKey(std::string lb_id, std::string lb_tag, std::string user_id, 71 std::string client_ip_hex) 72 : lb_id_(std::move(lb_id)), 73 lb_tag_(std::move(lb_tag)), 74 user_id_(std::move(user_id)), 75 client_ip_hex_(std::move(client_ip_hex)) {} 76 77 // Parses the input client_ip_and_token to set client IP, LB ID, and LB tag. 78 LoadRecordKey(const std::string& client_ip_and_token, std::string user_id); 79 ToString()80 std::string ToString() const { 81 return "[lb_id_=" + lb_id_ + ", lb_tag_=" + lb_tag_ + 82 ", user_id_=" + user_id_ + ", client_ip_hex_=" + client_ip_hex_ + 83 "]"; 84 } 85 86 bool operator==(const LoadRecordKey& other) const { 87 return lb_id_ == other.lb_id_ && lb_tag_ == other.lb_tag_ && 88 user_id_ == other.user_id_ && client_ip_hex_ == other.client_ip_hex_; 89 } 90 91 // Gets the client IP bytes in network order (i.e., big-endian). 92 std::string GetClientIpBytes() const; 93 94 // Getters. lb_id()95 const std::string& lb_id() const { return lb_id_; } lb_tag()96 const std::string& lb_tag() const { return lb_tag_; } user_id()97 const std::string& user_id() const { return user_id_; } client_ip_hex()98 const std::string& client_ip_hex() const { return client_ip_hex_; } 99 100 struct Hasher { hash_combineHasher101 void hash_combine(size_t* seed, const std::string& k) const { 102 *seed ^= std::hash<std::string>()(k) + 0x9e3779b9 + (*seed << 6) + 103 (*seed >> 2); 104 } 105 operatorHasher106 size_t operator()(const LoadRecordKey& k) const { 107 size_t h = 0; 108 hash_combine(&h, k.lb_id_); 109 hash_combine(&h, k.lb_tag_); 110 hash_combine(&h, k.user_id_); 111 hash_combine(&h, k.client_ip_hex_); 112 return h; 113 } 114 }; 115 116 private: 117 std::string lb_id_; 118 std::string lb_tag_; 119 std::string user_id_; 120 std::string client_ip_hex_; 121 }; 122 123 // The value of a load record. 124 class LoadRecordValue { 125 public: 126 explicit LoadRecordValue(uint64_t start_count = 0, uint64_t ok_count = 0, 127 uint64_t error_count = 0, uint64_t bytes_sent = 0, 128 uint64_t bytes_recv = 0, uint64_t latency_ms = 0) start_count_(start_count)129 : start_count_(start_count), 130 ok_count_(ok_count), 131 error_count_(error_count), 132 bytes_sent_(bytes_sent), 133 bytes_recv_(bytes_recv), 134 latency_ms_(latency_ms) {} 135 136 LoadRecordValue(std::string metric_name, uint64_t num_calls, 137 double total_metric_value); 138 MergeFrom(const LoadRecordValue & other)139 void MergeFrom(const LoadRecordValue& other) { 140 start_count_ += other.start_count_; 141 ok_count_ += other.ok_count_; 142 error_count_ += other.error_count_; 143 bytes_sent_ += other.bytes_sent_; 144 bytes_recv_ += other.bytes_recv_; 145 latency_ms_ += other.latency_ms_; 146 for (const auto& p : other.call_metrics_) { 147 const std::string& key = p.first; 148 const CallMetricValue& value = p.second; 149 call_metrics_[key].MergeFrom(value); 150 } 151 } 152 GetNumCallsInProgressDelta()153 int64_t GetNumCallsInProgressDelta() const { 154 return static_cast<int64_t>(start_count_ - ok_count_ - error_count_); 155 } 156 ToString()157 std::string ToString() const { 158 return "[start_count_=" + std::to_string(start_count_) + 159 ", ok_count_=" + std::to_string(ok_count_) + 160 ", error_count_=" + std::to_string(error_count_) + 161 ", bytes_sent_=" + std::to_string(bytes_sent_) + 162 ", bytes_recv_=" + std::to_string(bytes_recv_) + 163 ", latency_ms_=" + std::to_string(latency_ms_) + ", " + 164 std::to_string(call_metrics_.size()) + " other call metric(s)]"; 165 } 166 InsertCallMetric(const std::string & metric_name,const CallMetricValue & metric_value)167 bool InsertCallMetric(const std::string& metric_name, 168 const CallMetricValue& metric_value) { 169 return call_metrics_.insert({metric_name, metric_value}).second; 170 } 171 172 // Getters. start_count()173 uint64_t start_count() const { return start_count_; } ok_count()174 uint64_t ok_count() const { return ok_count_; } error_count()175 uint64_t error_count() const { return error_count_; } bytes_sent()176 uint64_t bytes_sent() const { return bytes_sent_; } bytes_recv()177 uint64_t bytes_recv() const { return bytes_recv_; } latency_ms()178 uint64_t latency_ms() const { return latency_ms_; } call_metrics()179 const std::unordered_map<std::string, CallMetricValue>& call_metrics() const { 180 return call_metrics_; 181 } 182 183 private: 184 uint64_t start_count_ = 0; 185 uint64_t ok_count_ = 0; 186 uint64_t error_count_ = 0; 187 uint64_t bytes_sent_ = 0; 188 uint64_t bytes_recv_ = 0; 189 uint64_t latency_ms_ = 0; 190 std::unordered_map<std::string, CallMetricValue> call_metrics_; 191 }; 192 193 // Stores the data associated with a particular LB ID. 194 class PerBalancerStore { 195 public: 196 using LoadRecordMap = 197 std::unordered_map<LoadRecordKey, LoadRecordValue, LoadRecordKey::Hasher>; 198 PerBalancerStore(std::string lb_id,std::string load_key)199 PerBalancerStore(std::string lb_id, std::string load_key) 200 : lb_id_(std::move(lb_id)), load_key_(std::move(load_key)) {} 201 202 // Merge a load record with the given key and value if the store is not 203 // suspended. 204 void MergeRow(const LoadRecordKey& key, const LoadRecordValue& value); 205 206 // Suspend this store, so that no detailed load data will be recorded. 207 void Suspend(); 208 // Resume this store from suspension. 209 void Resume(); 210 // Is this store suspended or not? IsSuspended()211 bool IsSuspended() const { return suspended_; } 212 IsNumCallsInProgressChangedSinceLastReport()213 bool IsNumCallsInProgressChangedSinceLastReport() const { 214 return num_calls_in_progress_ != last_reported_num_calls_in_progress_; 215 } 216 217 uint64_t GetNumCallsInProgressForReport(); 218 ToString()219 std::string ToString() { 220 return "[PerBalancerStore lb_id_=" + lb_id_ + " load_key_=" + load_key_ + 221 "]"; 222 } 223 ClearLoadRecordMap()224 void ClearLoadRecordMap() { load_record_map_.clear(); } 225 226 // Getters. lb_id()227 const std::string& lb_id() const { return lb_id_; } load_key()228 const std::string& load_key() const { return load_key_; } load_record_map()229 const LoadRecordMap& load_record_map() const { return load_record_map_; } 230 231 private: 232 std::string lb_id_; 233 // TODO(juanlishen): Use bytestring protobuf type? 234 std::string load_key_; 235 LoadRecordMap load_record_map_; 236 uint64_t num_calls_in_progress_ = 0; 237 uint64_t last_reported_num_calls_in_progress_ = 0; 238 bool suspended_ = false; 239 }; 240 241 // Stores the data associated with a particular host. 242 class PerHostStore { 243 public: 244 // When a report stream is created, a PerBalancerStore is created for the 245 // LB ID (guaranteed unique) associated with that stream. If it is the only 246 // active store, adopt all the orphaned stores. If it is the first created 247 // store, adopt the store of kInvalidLbId. 248 void ReportStreamCreated(const std::string& lb_id, 249 const std::string& load_key); 250 251 // When a report stream is closed, the PerBalancerStores assigned to the 252 // associate LB ID need to be re-assigned to other active balancers, 253 // ideally with the same load key. If there is no active balancer, we have 254 // to suspend those stores and drop the incoming load data until they are 255 // resumed. 256 void ReportStreamClosed(const std::string& lb_id); 257 258 // Returns null if not found. Caller doesn't own the returned store. 259 PerBalancerStore* FindPerBalancerStore(const std::string& lb_id) const; 260 261 // Returns null if lb_id is not found. The returned pointer points to the 262 // underlying data structure, which is not owned by the caller. 263 const std::set<PerBalancerStore*>* GetAssignedStores( 264 const std::string& lb_id) const; 265 266 private: 267 // Creates a PerBalancerStore for the given LB ID, assigns the store to 268 // itself, and records the LB ID to the load key. 269 void SetUpForNewLbId(const std::string& lb_id, const std::string& load_key); 270 271 void AssignOrphanedStore(PerBalancerStore* orphaned_store, 272 const std::string& new_receiver); 273 274 std::unordered_map<std::string, std::set<std::string>> 275 load_key_to_receiving_lb_ids_; 276 277 // Key: LB ID. The key set includes all the LB IDs that have been 278 // allocated for reporting streams so far. 279 // Value: the unique pointer to the PerBalancerStore of the LB ID. 280 std::unordered_map<std::string, std::unique_ptr<PerBalancerStore>> 281 per_balancer_stores_; 282 283 // Key: LB ID. The key set includes the LB IDs of the balancers that are 284 // currently receiving report. 285 // Value: the set of raw pointers to the PerBalancerStores assigned to the LB 286 // ID. Note that the sets in assigned_stores_ form a division of the value set 287 // of per_balancer_stores_. 288 std::unordered_map<std::string, std::set<PerBalancerStore*>> assigned_stores_; 289 }; 290 291 // Thread-unsafe two-level bookkeeper of all the load data. 292 // Note: We never remove any store objects from this class, as per the 293 // current spec. That's because premature removal of the store objects 294 // may lead to loss of critical information, e.g., mapping from lb_id to 295 // load_key, and the number of in-progress calls. Such loss will cause 296 // information inconsistency when the balancer is re-connected. Keeping 297 // all the stores should be fine for PerHostStore, since we assume there 298 // should only be a few hostnames. But it's a potential problem for 299 // PerBalancerStore. 300 class LoadDataStore { 301 public: 302 // Returns null if not found. Caller doesn't own the returned store. 303 PerBalancerStore* FindPerBalancerStore(const std::string& hostname, 304 const std::string& lb_id) const; 305 306 // Returns null if hostname or lb_id is not found. The returned pointer points 307 // to the underlying data structure, which is not owned by the caller. 308 const std::set<PerBalancerStore*>* GetAssignedStores(const string& hostname, 309 const string& lb_id); 310 311 // If a PerBalancerStore can be found by the hostname and LB ID in 312 // LoadRecordKey, the load data will be merged to that store. Otherwise, 313 // only track the number of the in-progress calls for this unknown LB ID. 314 void MergeRow(const std::string& hostname, const LoadRecordKey& key, 315 const LoadRecordValue& value); 316 317 // Is the given lb_id a tracked unknown LB ID (i.e., the LB ID was associated 318 // with some received load data but unknown to this load data store)? IsTrackedUnknownBalancerId(const std::string & lb_id)319 bool IsTrackedUnknownBalancerId(const std::string& lb_id) const { 320 return unknown_balancer_id_trackers_.find(lb_id) != 321 unknown_balancer_id_trackers_.end(); 322 } 323 324 // Wrapper around PerHostStore::ReportStreamCreated. 325 void ReportStreamCreated(const std::string& hostname, 326 const std::string& lb_id, 327 const std::string& load_key); 328 329 // Wrapper around PerHostStore::ReportStreamClosed. 330 void ReportStreamClosed(const std::string& hostname, 331 const std::string& lb_id); 332 333 private: 334 // Buffered data that was fetched from Census but hasn't been sent to 335 // balancer. We need to keep this data ourselves because Census will 336 // delete the data once it's returned. 337 std::unordered_map<std::string, PerHostStore> per_host_stores_; 338 339 // Tracks the number of in-progress calls for each unknown LB ID. 340 std::unordered_map<std::string, uint64_t> unknown_balancer_id_trackers_; 341 }; 342 343 } // namespace load_reporter 344 } // namespace grpc 345 346 #endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H 347