1 // Copyright 2022 gRPC authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef GRPC_SRC_CORE_UTIL_PER_CPU_H 16 #define GRPC_SRC_CORE_UTIL_PER_CPU_H 17 18 #include <grpc/support/cpu.h> 19 #include <grpc/support/port_platform.h> 20 #include <stdint.h> 21 22 #include <algorithm> 23 #include <cstddef> 24 #include <limits> 25 #include <memory> 26 27 // Sharded collections of objects 28 // This used to be per-cpu, now it's much less so - but still a way to limit 29 // contention. 30 31 namespace grpc_core { 32 33 class PerCpuOptions { 34 public: 35 // Set the number of cpus that colocate on the same shard SetCpusPerShard(size_t cpus_per_shard)36 PerCpuOptions SetCpusPerShard(size_t cpus_per_shard) { 37 cpus_per_shard_ = std::max<size_t>(1, cpus_per_shard); 38 return *this; 39 } 40 41 // Set the maximum number of allowable shards SetMaxShards(size_t max_shards)42 PerCpuOptions SetMaxShards(size_t max_shards) { 43 max_shards_ = std::max<size_t>(1, max_shards); 44 return *this; 45 } 46 cpus_per_shard()47 size_t cpus_per_shard() const { return cpus_per_shard_; } max_shards()48 size_t max_shards() const { return max_shards_; } 49 50 size_t Shards(); 51 size_t ShardsForCpuCount(size_t cpu_count); 52 53 private: 54 size_t cpus_per_shard_ = 1; 55 size_t max_shards_ = std::numeric_limits<size_t>::max(); 56 }; 57 58 class PerCpuShardingHelper { 59 public: GetShardingBits()60 size_t GetShardingBits() { 61 // We periodically refresh the last seen cpu to try to ensure that we spread 62 // load evenly over all shards of a per-cpu data structure, even in the 63 // event of shifting thread distributions, load patterns. 64 // Ideally we'd just call gpr_cpu_current_cpu() every call of this function 65 // to get perfect distribution, but that function is currently quite slow on 66 // some platforms and so we need to cache it somewhat. 67 if (GPR_UNLIKELY(state_.uses_until_refresh == 0)) state_ = State(); 68 --state_.uses_until_refresh; 69 return state_.last_seen_cpu; 70 } 71 72 private: 73 struct State { 74 uint16_t last_seen_cpu = gpr_cpu_current_cpu(); 75 uint16_t uses_until_refresh = 65535; 76 }; 77 static thread_local State state_; 78 }; 79 80 template <typename T> 81 class PerCpu { 82 public: 83 // Options are not defaulted to try and force consideration of what the 84 // options specify. PerCpu(PerCpuOptions options)85 explicit PerCpu(PerCpuOptions options) : shards_(options.Shards()) {} 86 this_cpu()87 T& this_cpu() { return data_[sharding_helper_.GetShardingBits() % shards_]; } 88 begin()89 T* begin() { return data_.get(); } end()90 T* end() { return data_.get() + shards_; } begin()91 const T* begin() const { return data_.get(); } end()92 const T* end() const { return data_.get() + shards_; } 93 94 private: 95 PerCpuShardingHelper sharding_helper_; 96 const size_t shards_; 97 std::unique_ptr<T[]> data_{new T[shards_]}; 98 }; 99 100 } // namespace grpc_core 101 102 #endif // GRPC_SRC_CORE_UTIL_PER_CPU_H 103