1 // Copyright 2022 gRPC authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef GRPC_SRC_CORE_LIB_GPRPP_PER_CPU_H 16 #define GRPC_SRC_CORE_LIB_GPRPP_PER_CPU_H 17 18 #include <grpc/support/port_platform.h> 19 20 #include <stdint.h> 21 22 #include <algorithm> 23 #include <cstddef> 24 #include <limits> 25 #include <memory> 26 27 #include <grpc/support/cpu.h> 28 29 // Sharded collections of objects 30 // This used to be per-cpu, now it's much less so - but still a way to limit 31 // contention. 32 33 namespace grpc_core { 34 35 class PerCpuOptions { 36 public: 37 // Set the number of cpus that colocate on the same shard SetCpusPerShard(size_t cpus_per_shard)38 PerCpuOptions SetCpusPerShard(size_t cpus_per_shard) { 39 cpus_per_shard_ = std::max<size_t>(1, cpus_per_shard); 40 return *this; 41 } 42 43 // Set the maximum number of allowable shards SetMaxShards(size_t max_shards)44 PerCpuOptions SetMaxShards(size_t max_shards) { 45 max_shards_ = std::max<size_t>(1, max_shards); 46 return *this; 47 } 48 cpus_per_shard()49 size_t cpus_per_shard() const { return cpus_per_shard_; } max_shards()50 size_t max_shards() const { return max_shards_; } 51 52 size_t Shards(); 53 size_t ShardsForCpuCount(size_t cpu_count); 54 55 private: 56 size_t cpus_per_shard_ = 1; 57 size_t max_shards_ = std::numeric_limits<size_t>::max(); 58 }; 59 60 class PerCpuShardingHelper { 61 public: GetShardingBits()62 size_t GetShardingBits() { 63 // We periodically refresh the last seen cpu to try to ensure that we spread 64 // load evenly over all shards of a per-cpu data structure, even in the 65 // event of shifting thread distributions, load patterns. 66 // Ideally we'd just call gpr_cpu_current_cpu() every call of this function 67 // to get perfect distribution, but that function is currently quite slow on 68 // some platforms and so we need to cache it somewhat. 69 if (GPR_UNLIKELY(state_.uses_until_refresh == 0)) state_ = State(); 70 --state_.uses_until_refresh; 71 return state_.last_seen_cpu; 72 } 73 74 private: 75 struct State { 76 uint16_t last_seen_cpu = gpr_cpu_current_cpu(); 77 uint16_t uses_until_refresh = 65535; 78 }; 79 static thread_local State state_; 80 }; 81 82 template <typename T> 83 class PerCpu { 84 public: 85 // Options are not defaulted to try and force consideration of what the 86 // options specify. PerCpu(PerCpuOptions options)87 explicit PerCpu(PerCpuOptions options) : shards_(options.Shards()) {} 88 this_cpu()89 T& this_cpu() { return data_[sharding_helper_.GetShardingBits() % shards_]; } 90 begin()91 T* begin() { return data_.get(); } end()92 T* end() { return data_.get() + shards_; } begin()93 const T* begin() const { return data_.get(); } end()94 const T* end() const { return data_.get() + shards_; } 95 96 private: 97 PerCpuShardingHelper sharding_helper_; 98 const size_t shards_; 99 std::unique_ptr<T[]> data_{new T[shards_]}; 100 }; 101 102 } // namespace grpc_core 103 104 #endif // GRPC_SRC_CORE_LIB_GPRPP_PER_CPU_H 105