1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef RUY_RUY_CPU_CACHE_PARAMS_H_ 17 #define RUY_RUY_CPU_CACHE_PARAMS_H_ 18 19 namespace ruy { 20 21 // Holds some information about a CPU's data caches. 22 // 23 // Meaning of 'local': a 'local' cache means a cache that is used by only one 24 // CPU core, not shared with other cores. It might still be used by multiple 25 // 'processors' in case of SMT as in Intel HyperThreading. CPUs often have 26 // multiple levels of local cache, e.g. L1 and L2. We typically return the 27 // larger one, the assumption being that even the larger one has substantially 28 // lower latency than any higher (non-local) cache, however as noted below (*) 29 // the implementation may choose to ignore a cache level. 30 // 31 // Meaning of 'last level': this refers to some higher cache level, typically 32 // shared among multiple CPU cores, so we considered using the terminology 33 // 'shared' instead of 'last_level'. However that created some confusion of its 34 // own, as the meaning of 'shared' varies between CPUs, with some CPUs not 35 // having any level of cache shared among all cores. That is why we stick with 36 // the 'last_level' terminology, however with the following caveats: 37 // 1. As noted below (*) the implementation may choose to ignore a cache 38 // level, which could cause the 'last level' cache according to ruy not to be 39 // the actual last level. 40 // 2. On some systems-on-chip there is a 'last level' cache outside of the 41 // last level cache in the CPU complex. Ruy is not currently doing anything 42 // specific regarding such caches. 43 // 3. We haven't figured out how to amend our terminology to be meaningful 44 // on NUMA architectures. NUMA hasn't been part of ruy's scope so far. 45 // 46 // (*) Note on ignoring certain cache levels: 47 // The implementation may choose to ignore a cache if it's suspected not to 48 // have compelling performance. This is true about all cache levels, but more 49 // likely regarding the 'last level' cache. For example, a L4 cache may be 50 // ignored if we believe that it's not the right latency/size compromise for us, 51 // so on such a CPU, the L3 cache may be used as the 'last level' cache instead. 52 // 53 // (**) Note on CPUs with heterogeneous cores: 54 // Some CPUs have multiple cores with different local caches. For example, some 55 // ARM big.LITTLE CPUs have some CPU cores with L1=32k and L2=128k, and some 56 // other CPU cores with L1=64k and L2=256k or even 512k. On such CPUs, the 57 // fields in this struct refer to the minimum value over all cores. In other 58 // words, we use conservative values that do not risk over-estimating local 59 // cache sizes in case of a migration of our threads to smaller cores. 60 // 61 // Example: 62 // On a Qualcomm S855 SoC, there are 8 CPU cores. Each core has L1 and L2 data 63 // caches local to it: 64 // - 4 cores have L1=32k, L2=128k. 65 // - 3 cores have L1=64k, L2=256k. 66 // - 1 core has L1=64k, L2=512k. 67 // All 8 cores share a L3 cache of size 2M, and there is beyond that a SoC-level 68 // cache of size 3M. 69 // On such a system, we should have: 70 // - local_level_cache_size=128k, the smallest L2 size. 71 // - last_level_cache_size=2M, the L3 cache size, ignoring the SoC-level cache. 72 struct CpuCacheParams final { 73 // Minimum value (see (**)), over all cores, of the size in bytes of its local 74 // cache (see "Meaning of 'local'"). 75 int local_cache_size = 0; 76 // Minimum value (see (**)), over all cores, of the size in bytes of its last 77 // level cache (see "Meaning of 'last level'"). 78 int last_level_cache_size = 0; 79 }; 80 81 } // namespace ruy 82 83 #endif // RUY_RUY_CPU_CACHE_PARAMS_H_ 84