diff --git CMakeLists.txt CMakeLists.txt index e594def..cab4d05 100644 --- CMakeLists.txt +++ CMakeLists.txt @@ -119,7 +119,8 @@ ENDIF() # ---[ cpuinfo library SET(CPUINFO_SRCS src/init.c - src/api.c) + src/api.c + src/cache.c) IF(CPUINFO_SUPPORTED_PLATFORM) IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$") diff --git LICENSE LICENSE index 4910bfe..3f9a4f0 100644 --- LICENSE +++ LICENSE @@ -1,3 +1,4 @@ +Copyright (c) 2019 Google LLC Copyright (c) 2017-2018 Facebook Inc. Copyright (C) 2012-2017 Georgia Institute of Technology Copyright (C) 2010-2012 Marat Dukhan diff --git include/cpuinfo.h include/cpuinfo.h index 7d5833f..9938d2b 100644 --- include/cpuinfo.h +++ include/cpuinfo.h @@ -38,10 +38,18 @@ #define CPUINFO_ARCH_PNACL 1 #endif -#if defined(EMSCRIPTEN) +#if defined(__asmjs__) #define CPUINFO_ARCH_ASMJS 1 #endif +#if defined(__wasm__) + #if defined(__wasm_simd128__) + #define CPUINFO_ARCH_WASMSIMD 1 + #else + #define CPUINFO_ARCH_WASM 1 + #endif +#endif + #if CPUINFO_ARCH_X86 && defined(_MSC_VER) #define CPUINFO_ABI __cdecl #elif CPUINFO_ARCH_X86 && defined(__GNUC__) @@ -80,6 +88,14 @@ #define CPUINFO_ARCH_ASMJS 0 #endif +#ifndef CPUINFO_ARCH_WASM + #define CPUINFO_ARCH_WASM 0 +#endif + +#ifndef CPUINFO_ARCH_WASMSIMD + #define CPUINFO_ARCH_WASMSIMD 0 +#endif + #define CPUINFO_CACHE_UNIFIED 0x00000001 #define CPUINFO_CACHE_INCLUSIVE 0x00000002 #define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 @@ -278,10 +294,14 @@ enum cpuinfo_uarch { cpuinfo_uarch_haswell = 0x00100208, /** Intel Broadwell microarchitecture. */ cpuinfo_uarch_broadwell = 0x00100209, - /** Intel Sky Lake microarchitecture. */ + /** Intel Sky Lake microarchitecture (14 nm, including Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */ cpuinfo_uarch_sky_lake = 0x0010020A, - /** Intel Kaby Lake microarchitecture. */ - cpuinfo_uarch_kaby_lake = 0x0010020B, + /** DEPRECATED (Intel Kaby Lake microarchitecture). */ + cpuinfo_uarch_kaby_lake = 0x0010020A, + /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */ + cpuinfo_uarch_palm_cove = 0x0010020B, + /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */ + cpuinfo_uarch_sunny_cove = 0x0010020C, /** Pentium 4 with Willamette, Northwood, or Foster cores. */ cpuinfo_uarch_willamette = 0x00100300, @@ -289,13 +309,17 @@ enum cpuinfo_uarch { cpuinfo_uarch_prescott = 0x00100301, /** Intel Atom on 45 nm process. */ - cpuinfo_uarch_bonnell = 0x00100400, + cpuinfo_uarch_bonnell = 0x00100400, /** Intel Atom on 32 nm process. */ - cpuinfo_uarch_saltwell = 0x00100401, + cpuinfo_uarch_saltwell = 0x00100401, /** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */ - cpuinfo_uarch_silvermont = 0x00100402, + cpuinfo_uarch_silvermont = 0x00100402, /** Intel Airmont microarchitecture (14 nm out-of-order Atom). */ - cpuinfo_uarch_airmont = 0x00100403, + cpuinfo_uarch_airmont = 0x00100403, + /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */ + cpuinfo_uarch_goldmont = 0x00100404, + /** Intel Goldmont Plus microarchitecture (Gemini Lake). */ + cpuinfo_uarch_goldmont_plus = 0x00100405, /** Intel Knights Ferry HPC boards. */ cpuinfo_uarch_knights_ferry = 0x00100500, @@ -335,8 +359,10 @@ enum cpuinfo_uarch { cpuinfo_uarch_steamroller = 0x00200107, /** AMD Excavator microarchitecture (Carizzo APUs). */ cpuinfo_uarch_excavator = 0x00200108, - /** AMD Zen microarchitecture (Ryzen CPUs). */ + /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */ cpuinfo_uarch_zen = 0x00200109, + /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen2 = 0x0020010A, /** NSC Geode and AMD Geode GX and LX. */ cpuinfo_uarch_geode = 0x00200200, @@ -370,23 +396,34 @@ enum cpuinfo_uarch { cpuinfo_uarch_cortex_a17 = 0x00300217, /** ARM Cortex-A32. */ - cpuinfo_uarch_cortex_a32 = 0x00300332, + cpuinfo_uarch_cortex_a32 = 0x00300332, /** ARM Cortex-A35. */ - cpuinfo_uarch_cortex_a35 = 0x00300335, + cpuinfo_uarch_cortex_a35 = 0x00300335, /** ARM Cortex-A53. */ - cpuinfo_uarch_cortex_a53 = 0x00300353, + cpuinfo_uarch_cortex_a53 = 0x00300353, /** ARM Cortex-A55. */ - cpuinfo_uarch_cortex_a55 = 0x00300355, + cpuinfo_uarch_cortex_a55 = 0x00300355, /** ARM Cortex-A57. */ - cpuinfo_uarch_cortex_a57 = 0x00300357, + cpuinfo_uarch_cortex_a57 = 0x00300357, + /** ARM Cortex-A65. */ + cpuinfo_uarch_cortex_a65 = 0x00300365, /** ARM Cortex-A72. */ - cpuinfo_uarch_cortex_a72 = 0x00300372, + cpuinfo_uarch_cortex_a72 = 0x00300372, /** ARM Cortex-A73. */ - cpuinfo_uarch_cortex_a73 = 0x00300373, + cpuinfo_uarch_cortex_a73 = 0x00300373, /** ARM Cortex-A75. */ - cpuinfo_uarch_cortex_a75 = 0x00300375, + cpuinfo_uarch_cortex_a75 = 0x00300375, /** ARM Cortex-A76. */ - cpuinfo_uarch_cortex_a76 = 0x00300376, + cpuinfo_uarch_cortex_a76 = 0x00300376, + /** ARM Cortex-A76AE. */ + cpuinfo_uarch_cortex_a76ae = 0x00300378, + /** ARM Cortex-A77. */ + cpuinfo_uarch_cortex_a77 = 0x00300377, + + /** ARM Neoverse N1. */ + cpuinfo_uarch_neoverse_n1 = 0x00300400, + /** ARM Neoverse E1. */ + cpuinfo_uarch_neoverse_e1 = 0x00300401, /** Qualcomm Scorpion. */ cpuinfo_uarch_scorpion = 0x00400100, @@ -406,12 +443,22 @@ enum cpuinfo_uarch { /** Nvidia Carmel. */ cpuinfo_uarch_carmel = 0x00500102, - /** Samsung Mongoose M1 (Exynos 8890 big cores). */ + /** Samsung Exynos M1 (Exynos 8890 big cores). */ + cpuinfo_uarch_exynos_m1 = 0x00600100, + /** Samsung Exynos M2 (Exynos 8895 big cores). */ + cpuinfo_uarch_exynos_m2 = 0x00600101, + /** Samsung Exynos M3 (Exynos 9810 big cores). */ + cpuinfo_uarch_exynos_m3 = 0x00600102, + /** Samsung Exynos M4 (Exynos 9820 big cores). */ + cpuinfo_uarch_exynos_m4 = 0x00600103, + /** Samsung Exynos M5 (Exynos 9830 big cores). */ + cpuinfo_uarch_exynos_m5 = 0x00600104, + + /* Old names for Exynos. */ cpuinfo_uarch_mongoose_m1 = 0x00600100, - /** Samsung Mongoose M2 (Exynos 8895 big cores). */ cpuinfo_uarch_mongoose_m2 = 0x00600101, - /** Samsung Meerkat M3 (Exynos 9810 big cores). */ cpuinfo_uarch_meerkat_m3 = 0x00600102, + cpuinfo_uarch_meerkat_m4 = 0x00600103, /** Apple A6 and A6X processors. */ cpuinfo_uarch_swift = 0x00700100, @@ -640,6 +687,8 @@ void CPUINFO_ABI cpuinfo_deinitialize(void); bool avx512bitalg; bool avx512vpopcntdq; bool avx512vnni; + bool avx512bf16; + bool avx512vp2intersect; bool avx512_4vnniw; bool avx512_4fmaps; bool hle; @@ -1110,6 +1159,22 @@ static inline bool cpuinfo_has_x86_avx512vnni(void) { #endif } +static inline bool cpuinfo_has_x86_avx512bf16(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bf16; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vp2intersect(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vp2intersect; + #else + return false; + #endif +} + static inline bool cpuinfo_has_x86_avx512_4vnniw(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 return cpuinfo_isa.avx512_4vnniw; @@ -1682,6 +1747,11 @@ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void); uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void); uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void); +/** + * Returns upper bound on cache size. + */ +uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void); + const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void); const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void); diff --git src/api.c src/api.c index 98b5805..83744f5 100644 --- src/api.c +++ src/api.c @@ -18,6 +18,7 @@ uint32_t cpuinfo_cores_count = 0; uint32_t cpuinfo_clusters_count = 0; uint32_t cpuinfo_packages_count = 0; uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 }; +uint32_t cpuinfo_max_cache_size = 0; const struct cpuinfo_processor* cpuinfo_get_processors(void) { diff --git src/arm/api.h src/arm/api.h index 11e588b..69274bc 100644 --- src/arm/api.h +++ src/arm/api.h @@ -104,6 +104,9 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( struct cpuinfo_cache l1d[restrict static 1], struct cpuinfo_cache l2[restrict static 1], struct cpuinfo_cache l3[restrict static 1]); + +CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size( + const struct cpuinfo_processor processor[restrict static 1]); #else /* defined(__cplusplus) */ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, diff --git src/arm/cache.c src/arm/cache.c index 5ada7d9..ccadeb4 100644 --- src/arm/cache.c +++ src/arm/cache.c @@ -1,10 +1,12 @@ #include #include +#include #include #include #include + void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, uint32_t cluster_cores, @@ -109,7 +111,7 @@ void cpuinfo_arm_decode_cache( * memory accesses and has been optimized for use with the Cortex-A5 processor. * 8.1.7. Exclusive L2 cache * The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode. - * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. + * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. * * +--------------------+-----------+-----------+----------+-----------+ * | Processor model | L1D cache | L1I cache | L2 cache | Reference | @@ -698,7 +700,7 @@ void cpuinfo_arm_decode_cache( * [3] https://en.wikichip.org/wiki/hisilicon/kirin/980 */ if (midr_is_qualcomm_cortex_a55_silver(midr)) { - /* Qualcomm-modified Cortex-A55 in Snapdragon 710 / 845 */ + /* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */ uint32_t l3_size = 1024 * 1024; switch (chipset->series) { case cpuinfo_arm_chipset_series_qualcomm_snapdragon: @@ -827,6 +829,62 @@ void cpuinfo_arm_decode_cache( .flags = CPUINFO_CACHE_INCLUSIVE }; break; + case cpuinfo_uarch_cortex_a65: + { + /* + * ARM Cortex‑A65 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system enhances the performance and power efficiency in the Cortex‑A65 core. + * It consists of separate instruction and data caches. You can configure instruction and data caches + * independently during implementation to sizes of 32KB or 64KB. + * + * L1 instruction-side memory system + * The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are: + * - 64-byte instruction side cache line length. + * - 4-way set associative L1 instruction cache. + * + * L1 data-side memory system + * - 64-byte data side cache line length. + * - 4-way set associative L1 data cache. + * + * A7.1 About the L2 memory system + * The Cortex‑A65 L2 memory system is required to interface the Cortex‑A65 cores to the L3 memory system. + * The L2 memory subsystem consists of: + * - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB. + * Cache lines have a fixed length of 64 bytes. + * + * The main features of the L2 memory system are: + * - Strictly exclusive with L1 data cache. + * - Pseudo-inclusive with L1 instruction cache. + * - Private per-core unified L2 cache. + */ + const uint32_t l1_size = 32 * 1024; + const uint32_t l2_size = 128 * 1024; + const uint32_t l3_size = 512 * 1024; + *l1i = (struct cpuinfo_cache) { + .size = l1_size, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = l1_size, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 4, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + /* DynamIQ */ + .associativity = 16, + .line_size = 64, + }; + break; + } case cpuinfo_uarch_cortex_a72: { /* @@ -1047,6 +1105,7 @@ void cpuinfo_arm_decode_cache( break; } case cpuinfo_uarch_cortex_a76: + case cpuinfo_uarch_cortex_a76ae: { /* * ARM Cortex-A76 Core Technical Reference Manual @@ -1119,6 +1178,57 @@ void cpuinfo_arm_decode_cache( }; break; } + case cpuinfo_uarch_cortex_a77: + { + /* + * ARM Cortex-A77 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * A6.1.1 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * + * A6.1.2 L1 data-side memory system + * The L1 data memory system has the following features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-LRU cache replacement policy. + * + * A7.1 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines + * have a fixed length of 64 bytes. + * - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache. + */ + const uint32_t l2_size = 256 * 1024; + const uint32_t l3_size = 1024 * 1024; + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 8, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + .associativity = 16, + .line_size = 64, + }; + break; + } #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) case cpuinfo_uarch_scorpion: /* @@ -1248,8 +1358,8 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; break; - case cpuinfo_uarch_mongoose_m1: - case cpuinfo_uarch_mongoose_m2: + case cpuinfo_uarch_exynos_m1: + case cpuinfo_uarch_exynos_m2: /* * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$, * namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1] @@ -1283,7 +1393,7 @@ void cpuinfo_arm_decode_cache( .line_size = 64 }; break; - case cpuinfo_uarch_meerkat_m3: + case cpuinfo_uarch_exynos_m3: /* * +--------------------+-------+-----------+-----------+-----------+----------+------------+ * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | @@ -1294,19 +1404,19 @@ void cpuinfo_arm_decode_cache( * [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results */ *l1i = (struct cpuinfo_cache) { - .size = 64 * 1024 /* assume same as in Mongoose cores */, - .associativity = 4 /* assume same as in Mongoose cores */, - .line_size = 128 /* assume same as in Mongoose cores */ + .size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */, + .associativity = 4 /* assume same as in Exynos M1/M2 cores */, + .line_size = 128 /* assume same as in Exynos M1/M2 cores */ }; *l1d = (struct cpuinfo_cache) { .size = 64 * 1024, - .associativity = 8 /* assume same as in Mongoose cores */, - .line_size = 64 /* assume same as in Mongoose cores */, + .associativity = 8 /* assume same as in Exynos M1/M2 cores */, + .line_size = 64 /* assume same as in Exynos M1/M2 cores */, }; *l2 = (struct cpuinfo_cache) { .size = 512 * 1024, - .associativity = 16 /* assume same as in Mongoose cores */, - .line_size = 64 /* assume same as in Mongoose cores */, + .associativity = 16 /* assume same as in Exynos M1/M2 cores */, + .line_size = 64 /* assume same as in Exynos M1/M2 cores */, }; *l3 = (struct cpuinfo_cache) { .size = 4 * 1024 * 1024, @@ -1393,3 +1503,124 @@ void cpuinfo_arm_decode_cache( } } } + +uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) { + /* + * There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo + * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum. + */ + switch (processor->core->uarch) { + case cpuinfo_uarch_xscale: + case cpuinfo_uarch_arm11: + case cpuinfo_uarch_scorpion: + case cpuinfo_uarch_krait: + case cpuinfo_uarch_kryo: + case cpuinfo_uarch_exynos_m1: + case cpuinfo_uarch_exynos_m2: + case cpuinfo_uarch_exynos_m3: + /* cpuinfo-detected cache size always correct */ + return cpuinfo_compute_max_cache_size(processor); + case cpuinfo_uarch_cortex_a5: + /* Max observed (NXP Vybrid SoC) */ + return 512 * 1024; + case cpuinfo_uarch_cortex_a7: + /* + * Cortex-A7 MPCore Technical Reference Manual: + * 7.1. About the L2 Memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB. + */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a8: + /* + * Cortex-A8 Technical Reference Manual: + * 8.1. About the L2 memory system + * The key features of the L2 memory system include: + * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB + */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a9: + /* Max observed (e.g. Exynos 4212) */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a12: + case cpuinfo_uarch_cortex_a17: + /* + * ARM Cortex-A17 MPCore Processor Technical Reference Manual: + * 7.1. About the L2 Memory system + * The key features of the L2 memory system include: + * - An integrated L2 cache: + * - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB. + */ + return 8 * 1024 * 1024; + case cpuinfo_uarch_cortex_a15: + /* + * ARM Cortex-A15 MPCore Processor Technical Reference Manual: + * 7.1. About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + */ + return 4 * 1024 * 1024; + case cpuinfo_uarch_cortex_a35: + /* + * ARM Cortex‑A35 Processor Technical Reference Manual: + * 7.1 About the L2 memory system + * L2 cache + * - Further features of the L2 cache are: + * - Configurable size of 128KB, 256KB, 512KB, and 1MB. + */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a53: + /* + * ARM Cortex-A53 MPCore Processor Technical Reference Manual: + * 7.1. About the L2 memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB. + */ + return 2 * 1024 * 1024; + case cpuinfo_uarch_cortex_a57: + /* + * ARM Cortex-A57 MPCore Processor Technical Reference Manual: + * 7.1 About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, and 2MB. + */ + return 2 * 1024 * 1024; + case cpuinfo_uarch_cortex_a72: + /* + * ARM Cortex-A72 MPCore Processor Technical Reference Manual: + * 7.1 About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + */ + return 4 * 1024 * 1024; + case cpuinfo_uarch_cortex_a73: + /* + * ARM Cortex‑A73 MPCore Processor Technical Reference Manual + * 7.1 About the L2 memory system + * The L2 memory system consists of: + * - A tightly-integrated L2 cache with: + * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB. + */ + return 8 * 1024 * 1024; + case cpuinfo_uarch_cortex_a55: + case cpuinfo_uarch_cortex_a75: + case cpuinfo_uarch_cortex_a76: + case cpuinfo_uarch_exynos_m4: + default: + /* + * ARM DynamIQ Shared Unit Technical Reference Manual + * 1.3 Implementation options + * L3_CACHE_SIZE + * - 256KB + * - 512KB + * - 1024KB + * - 1536KB + * - 2048KB + * - 3072KB + * - 4096KB + */ + return 4 * 1024 * 1024; + } +} diff --git src/arm/linux/init.c src/arm/linux/init.c index a297f63..f0c432c 100644 --- src/arm/linux/init.c +++ src/arm/linux/init.c @@ -678,6 +678,8 @@ void cpuinfo_arm_linux_init(void) { cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]); + __sync_synchronize(); cpuinfo_is_initialized = true; diff --git src/arm/linux/midr.c src/arm/linux/midr.c index 668fc72..2c3116b 100644 --- src/arm/linux/midr.c +++ src/arm/linux/midr.c @@ -220,7 +220,7 @@ static const struct cluster_config cluster_configs[] = { .model = UINT16_C(7420), .clusters = 2, .cluster_cores = { - [0] = 4, + [0] = 4, [1] = 4, }, .cluster_midr = { @@ -229,7 +229,7 @@ static const struct cluster_config cluster_configs[] = { }, }, { - /* Exynos 8890: 4x Mongoose + 4x Cortex-A53 */ + /* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */ .cores = 8, .series = cpuinfo_arm_chipset_series_samsung_exynos, .model = UINT16_C(8890), @@ -695,7 +695,7 @@ static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan( if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { if (processors[i].package_leader_id == i) { if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) { - midr = processors[i].midr; + midr = processors[i].midr; } else { cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr); /* To be consistent, we copy the MIDR entirely, rather than by parts */ @@ -836,7 +836,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr( * - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value. * - Clusters following any reported MIDR value to have that MIDR value. */ - + if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset( chipset, clusters_count, cluster_leaders, usable_processors, processors, true)) { diff --git src/arm/mach/init.c src/arm/mach/init.c index 5b14b49..e64cc18 100644 --- src/arm/mach/init.c +++ src/arm/mach/init.c @@ -562,6 +562,8 @@ void cpuinfo_arm_mach_init(void) { cpuinfo_clusters_count = num_clusters; cpuinfo_packages_count = mach_topology.packages; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + __sync_synchronize(); cpuinfo_is_initialized = true; diff --git src/arm/midr.h src/arm/midr.h index 6363ed7..d5a28e3 100644 --- src/arm/midr.h +++ src/arm/midr.h @@ -33,31 +33,31 @@ #define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010) #define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050) #define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110) -#define CPUINFO_ARM_MIDR_MONGOOSE UINT32_C(0x530F0010) +#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010) #define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030) inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) { - return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | + return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | ((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK); } inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) { - return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | + return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | ((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK); } inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) { - return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | + return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | ((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK); } inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) { - return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | + return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK); } inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) { - return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | + return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | ((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK); } @@ -171,13 +171,20 @@ inline static bool midr_is_kryo_gold(uint32_t midr) { inline static uint32_t midr_score_core(uint32_t midr) { const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; switch (midr & core_mask) { + case UINT32_C(0x53000040): /* Exynos M5 */ + case UINT32_C(0x53000030): /* Exynos M4 */ + /* These cores are in big role w.r.t Cortex-A75 or Cortex-A76 */ + return 6; case UINT32_C(0x4E000030): /* Denver 2 */ - case UINT32_C(0x53000010): /* Mongoose */ - case UINT32_C(0x53000020): /* Meerkat */ + case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */ + case UINT32_C(0x53000020): /* Exynos M3 */ + case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */ case UINT32_C(0x51008020): /* Kryo 385 Gold */ case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ case UINT32_C(0x51002050): /* Kryo Gold */ case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ case UINT32_C(0x4100D0B0): /* Cortex-A76 */ case UINT32_C(0x4100D0A0): /* Cortex-A75 */ case UINT32_C(0x4100D090): /* Cortex-A73 */ @@ -191,12 +198,14 @@ inline static uint32_t midr_score_core(uint32_t midr) { case UINT32_C(0x4100D070): /* Cortex-A57 */ /* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */ return 4; + case UINT32_C(0x4100D060): /* Cortex-A65 */ case UINT32_C(0x4100D050): /* Cortex-A55 */ case UINT32_C(0x4100D030): /* Cortex-A53 */ /* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */ return 2; case UINT32_C(0x4100D040): /* Cortex-A35 */ case UINT32_C(0x4100C070): /* Cortex-A7 */ + case UINT32_C(0x51008050): /* Kryo 485 Silver */ case UINT32_C(0x51008030): /* Kryo 385 Silver */ case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */ case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */ @@ -215,7 +224,7 @@ inline static uint32_t midr_score_core(uint32_t midr) { } inline static uint32_t midr_little_core_for_big(uint32_t midr) { - const uint32_t core_mask = + const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; switch (midr & core_mask) { case CPUINFO_ARM_MIDR_CORTEX_A75: @@ -223,7 +232,7 @@ inline static uint32_t midr_little_core_for_big(uint32_t midr) { case CPUINFO_ARM_MIDR_CORTEX_A73: case CPUINFO_ARM_MIDR_CORTEX_A72: case CPUINFO_ARM_MIDR_CORTEX_A57: - case CPUINFO_ARM_MIDR_MONGOOSE: + case CPUINFO_ARM_MIDR_EXYNOS_M1_M2: return CPUINFO_ARM_MIDR_CORTEX_A53; case CPUINFO_ARM_MIDR_CORTEX_A17: case CPUINFO_ARM_MIDR_CORTEX_A15: diff --git src/arm/uarch.c src/arm/uarch.c index d7d2c63..a38250a 100644 --- src/arm/uarch.c +++ src/arm/uarch.c @@ -60,6 +60,9 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD05: *uarch = cpuinfo_uarch_cortex_a55; break; + case 0xD06: + *uarch = cpuinfo_uarch_cortex_a65; + break; case 0xD07: *uarch = cpuinfo_uarch_cortex_a57; break; @@ -75,6 +78,22 @@ void cpuinfo_arm_decode_vendor_uarch( case 0xD0B: *uarch = cpuinfo_uarch_cortex_a76; break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD0C: + *uarch = cpuinfo_uarch_neoverse_n1; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ + case 0xD0D: + *uarch = cpuinfo_uarch_cortex_a77; + break; + case 0xD0E: + *uarch = cpuinfo_uarch_cortex_a76ae; + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD4A: + *uarch = cpuinfo_uarch_neoverse_e1; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ default: switch (midr_get_part(midr) >> 8) { #if CPUINFO_ARCH_ARM @@ -242,10 +261,14 @@ void cpuinfo_arm_decode_vendor_uarch( *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a55; break; - case 0x804: + case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */ *vendor = cpuinfo_vendor_arm; *uarch = cpuinfo_uarch_cortex_a76; break; + case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a55; + break; #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) case 0xC00: *uarch = cpuinfo_uarch_falkor; @@ -263,27 +286,43 @@ void cpuinfo_arm_decode_vendor_uarch( switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { case 0x00100010: /* - * Exynos 8890 MIDR = 0x531F0011, assume Mongoose M1 has: + * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has: * - CPU variant 0x1 * - CPU part 0x001 */ - *uarch = cpuinfo_uarch_mongoose_m1; + *uarch = cpuinfo_uarch_exynos_m1; break; case 0x00400010: /* - * Exynos 8895 MIDR = 0x534F0010, assume Mongoose M2 has: + * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has: * - CPU variant 0x4 * - CPU part 0x001 */ - *uarch = cpuinfo_uarch_mongoose_m2; + *uarch = cpuinfo_uarch_exynos_m2; break; case 0x00100020: /* - * Exynos 9810 MIDR = 0x531F0020, assume Meerkat M3 has: + * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has: * - CPU variant 0x1 * - CPU part 0x002 */ - *uarch = cpuinfo_uarch_meerkat_m3; + *uarch = cpuinfo_uarch_exynos_m3; + break; + case 0x00100030: + /* + * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has: + * - CPU variant 0x1 + * - CPU part 0x003 + */ + *uarch = cpuinfo_uarch_exynos_m4; + break; + case 0x00100040: + /* + * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has: + * - CPU variant 0x1 + * - CPU part 0x004 + */ + *uarch = cpuinfo_uarch_exynos_m5; break; default: cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored", diff --git src/cache.c src/cache.c new file mode 100644 index 0000000..b976b87 --- /dev/null +++ src/cache.c @@ -0,0 +1,18 @@ +#include + +#include +#include + + +uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) { + if (processor->cache.l4 != NULL) { + return processor->cache.l4->size; + } else if (processor->cache.l3 != NULL) { + return processor->cache.l3->size; + } else if (processor->cache.l2 != NULL) { + return processor->cache.l2->size; + } else if (processor->cache.l1d != NULL) { + return processor->cache.l1d->size; + } + return 0; +} diff --git src/cpuinfo/internal-api.h src/cpuinfo/internal-api.h index 6045750..717b810 100644 --- src/cpuinfo/internal-api.h +++ src/cpuinfo/internal-api.h @@ -31,6 +31,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count; extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; +extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); @@ -40,4 +41,6 @@ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void); CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void); +CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor); + typedef void (*cpuinfo_processor_callback)(uint32_t); diff --git src/x86/isa.c src/x86/isa.c index bca1ecd..d27dbca 100644 --- src/x86/isa.c +++ src/x86/isa.c @@ -42,8 +42,10 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( { struct cpuinfo_x86_isa isa = { 0 }; - const struct cpuid_regs structured_feature_info = + const struct cpuid_regs structured_feature_info0 = (max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0}; + const struct cpuid_regs structured_feature_info1 = + (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0}; const uint32_t processor_capacity_info_index = UINT32_C(0x80000008); const struct cpuid_regs processor_capacity_info = @@ -144,9 +146,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * CLFLUSHOPT instruction: - * - Intel: ebx[bit 23] in structured feature info. + * - Intel: ebx[bit 23] in structured feature info (ecx = 0). */ - isa.clflushopt = !!(structured_feature_info.ebx & UINT32_C(0x00800000)); + isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000)); /* * MWAIT/MONITOR instructions: @@ -273,9 +275,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * PREFETCHWT1 instruction: - * - Intel: ecx[bit 0] of structured feature info. Reserved bit on AMD. + * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD. */ - isa.prefetchwt1 = !!(structured_feature_info.ecx & UINT32_C(0x00000001)); + isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001)); #if CPUINFO_ARCH_X86 /* @@ -386,111 +388,123 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * AVX2 instructions: - * - Intel: ebx[bit 5] in structured feature info. + * - Intel: ebx[bit 5] in structured feature info (ecx = 0). */ - isa.avx2 = avx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00000020)); + isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020)); /* * AVX512F instructions: - * - Intel: ebx[bit 16] in structured feature info. + * - Intel: ebx[bit 16] in structured feature info (ecx = 0). */ - isa.avx512f = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00010000)); + isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000)); /* * AVX512PF instructions: - * - Intel: ebx[bit 26] in structured feature info. + * - Intel: ebx[bit 26] in structured feature info (ecx = 0). */ - isa.avx512pf = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x04000000)); + isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000)); /* * AVX512ER instructions: - * - Intel: ebx[bit 27] in structured feature info. + * - Intel: ebx[bit 27] in structured feature info (ecx = 0). */ - isa.avx512er = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x08000000)); + isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000)); /* * AVX512CD instructions: - * - Intel: ebx[bit 28] in structured feature info. + * - Intel: ebx[bit 28] in structured feature info (ecx = 0). */ - isa.avx512cd = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x10000000)); + isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000)); /* * AVX512DQ instructions: - * - Intel: ebx[bit 17] in structured feature info. + * - Intel: ebx[bit 17] in structured feature info (ecx = 0). */ - isa.avx512dq = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00020000)); + isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000)); /* * AVX512BW instructions: - * - Intel: ebx[bit 30] in structured feature info. + * - Intel: ebx[bit 30] in structured feature info (ecx = 0). */ - isa.avx512bw = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x40000000)); + isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000)); /* * AVX512VL instructions: - * - Intel: ebx[bit 31] in structured feature info. + * - Intel: ebx[bit 31] in structured feature info (ecx = 0). */ - isa.avx512vl = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x80000000)); + isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000)); /* * AVX512IFMA instructions: - * - Intel: ebx[bit 21] in structured feature info. + * - Intel: ebx[bit 21] in structured feature info (ecx = 0). */ - isa.avx512ifma = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00200000)); + isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000)); /* * AVX512VBMI instructions: - * - Intel: ecx[bit 1] in structured feature info. + * - Intel: ecx[bit 1] in structured feature info (ecx = 0). */ - isa.avx512vbmi = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000002)); + isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002)); /* * AVX512VBMI2 instructions: - * - Intel: ecx[bit 6] in structured feature info. + * - Intel: ecx[bit 6] in structured feature info (ecx = 0). */ - isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000040)); + isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040)); /* * AVX512BITALG instructions: - * - Intel: ecx[bit 12] in structured feature info. + * - Intel: ecx[bit 12] in structured feature info (ecx = 0). */ - isa.avx512bitalg = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00001000)); + isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000)); /* * AVX512VPOPCNTDQ instructions: - * - Intel: ecx[bit 14] in structured feature info. + * - Intel: ecx[bit 14] in structured feature info (ecx = 0). */ - isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00004000)); + isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000)); /* * AVX512VNNI instructions: - * - Intel: ecx[bit 11] in structured feature info. + * - Intel: ecx[bit 11] in structured feature info (ecx = 0). */ - isa.avx512vnni = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000800)); + isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800)); /* * AVX512_4VNNIW instructions: - * - Intel: edx[bit 2] in structured feature info. + * - Intel: edx[bit 2] in structured feature info (ecx = 0). */ - isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000004)); + isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004)); /* * AVX512_4FMAPS instructions: - * - Intel: edx[bit 3] in structured feature info. + * - Intel: edx[bit 3] in structured feature info (ecx = 0). */ - isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000008)); + isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008)); + + /* + * AVX512_VP2INTERSECT instructions: + * - Intel: edx[bit 8] in structured feature info (ecx = 0). + */ + isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100)); + + /* + * AVX512_BF16 instructions: + * - Intel: eax[bit 5] in structured feature info (ecx = 1). + */ + isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020)); /* * HLE instructions: - * - Intel: ebx[bit 4] in structured feature info. + * - Intel: ebx[bit 4] in structured feature info (ecx = 0). */ - isa.hle = !!(structured_feature_info.ebx & UINT32_C(0x00000010)); + isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010)); /* * RTM instructions: - * - Intel: ebx[bit 11] in structured feature info. + * - Intel: ebx[bit 11] in structured feature info (ecx = 0). */ - isa.rtm = !!(structured_feature_info.ebx & UINT32_C(0x00000800)); + isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800)); /* * XTEST instruction: @@ -500,9 +514,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * MPX registers and instructions: - * - Intel: ebx[bit 14] in structured feature info. + * - Intel: ebx[bit 14] in structured feature info (ecx = 0). */ - isa.mpx = mpx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00004000)); + isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000)); #if CPUINFO_ARCH_X86 /* @@ -528,9 +542,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * CLWB instruction: - * - Intel: ebx[bit 24] in structured feature info. + * - Intel: ebx[bit 24] in structured feature info (ecx = 0). */ - isa.clwb = !!(structured_feature_info.ebx & UINT32_C(0x01000000)); + isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000)); /* * MOVBE instruction: @@ -549,9 +563,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions. - * - Intel: ebx[bit 0] in structured feature info. + * - Intel: ebx[bit 0] in structured feature info (ecx = 0). */ - isa.fs_gs_base = !!(structured_feature_info.ebx & UINT32_C(0x00000001)); + isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001)); /* * LZCNT instruction: @@ -573,21 +587,21 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * BMI instructions: - * - Intel, AMD: ebx[bit 3] in structured feature info. + * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0). */ - isa.bmi = !!(structured_feature_info.ebx & UINT32_C(0x00000008)); + isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008)); /* * BMI2 instructions: - * - Intel: ebx[bit 8] in structured feature info. + * - Intel: ebx[bit 8] in structured feature info (ecx = 0). */ - isa.bmi2 = !!(structured_feature_info.ebx & UINT32_C(0x00000100)); + isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100)); /* * ADCX/ADOX instructions: - * - Intel: ebx[bit 19] in structured feature info. + * - Intel: ebx[bit 19] in structured feature info (ecx = 0). */ - isa.adx = !!(structured_feature_info.ebx & UINT32_C(0x00080000)); + isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000)); /* * AES instructions: @@ -597,9 +611,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * VAES instructions: - * - Intel: ecx[bit 9] in structured feature info. + * - Intel: ecx[bit 9] in structured feature info (ecx = 0). */ - isa.vaes = !!(structured_feature_info.ecx & UINT32_C(0x00000200)); + isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200)); /* * PCLMULQDQ instruction: @@ -609,15 +623,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * VPCLMULQDQ instruction: - * - Intel: ecx[bit 10] in structured feature info. + * - Intel: ecx[bit 10] in structured feature info (ecx = 0). */ - isa.vpclmulqdq = !!(structured_feature_info.ecx & UINT32_C(0x00000400)); + isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400)); /* * GFNI instructions: - * - Intel: ecx[bit 8] in structured feature info. + * - Intel: ecx[bit 8] in structured feature info (ecx = 0). */ - isa.gfni = !!(structured_feature_info.ecx & UINT32_C(0x00000100)); + isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100)); /* * RDRAND instruction: @@ -627,15 +641,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * RDSEED instruction: - * - Intel: ebx[bit 18] in structured feature info. + * - Intel: ebx[bit 18] in structured feature info (ecx = 0). */ - isa.rdseed = !!(structured_feature_info.ebx & UINT32_C(0x00040000)); + isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000)); /* * SHA instructions: - * - Intel: ebx[bit 29] in structured feature info. + * - Intel: ebx[bit 29] in structured feature info (ecx = 0). */ - isa.sha = !!(structured_feature_info.ebx & UINT32_C(0x20000000)); + isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000)); if (vendor == cpuinfo_vendor_via) { const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000)); @@ -700,9 +714,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * RDPID instruction: - * - Intel: ecx[bit 22] in structured feature info. + * - Intel: ecx[bit 22] in structured feature info (ecx = 0). */ - isa.rdpid = !!(structured_feature_info.ecx & UINT32_C(0x00400000)); + isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000)); return isa; } diff --git src/x86/linux/init.c src/x86/linux/init.c index b5f74d0..c096336 100644 --- src/x86/linux/init.c +++ src/x86/linux/init.c @@ -592,6 +592,8 @@ void cpuinfo_x86_linux_init(void) { cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + __sync_synchronize(); cpuinfo_is_initialized = true; diff --git src/x86/mach/init.c src/x86/mach/init.c index 7b41ad0..ae2be33 100644 --- src/x86/mach/init.c +++ src/x86/mach/init.c @@ -327,6 +327,8 @@ void cpuinfo_x86_mach_init(void) { cpuinfo_clusters_count = mach_topology.packages; cpuinfo_packages_count = mach_topology.packages; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + __sync_synchronize(); cpuinfo_is_initialized = true; diff --git src/x86/uarch.c src/x86/uarch.c index 71c899e..ba72d8a 100644 --- src/x86/uarch.c +++ src/x86/uarch.c @@ -74,13 +74,19 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x4F: // Broadwell-E case 0x56: // Broadwell-DE return cpuinfo_uarch_broadwell; - case 0x4E: // Skylake-U/Y - case 0x55: // Skylake Server (SKX) - case 0x5E: // Skylake-H/S + case 0x4E: // Sky Lake Client Y/U + case 0x55: // Sky/Cascade/Cooper Lake Server + case 0x5E: // Sky Lake Client DT/H/S + case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U + case 0x9E: // Kaby/Coffee Lake DT/H/S return cpuinfo_uarch_sky_lake; - case 0x8E: // Kaby Lake U/Y - case 0x9E: // Kaby Lake H/S - return cpuinfo_uarch_kaby_lake; + case 0x66: // Cannon Lake (Core i3-8121U) + return cpuinfo_uarch_palm_cove; + case 0x6A: // Ice Lake-DE + case 0x6C: // Ice Lake-SP + case 0x7D: // Ice Lake-Y + case 0x7E: // Ice Lake-U + return cpuinfo_uarch_sunny_cove; /* Low-power cores */ case 0x1C: // Diamondville, Silverthorne, Pineview @@ -90,18 +96,20 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x35: // Cloverview case 0x36: // Cedarview, Centerton return cpuinfo_uarch_saltwell; - case 0x37: - case 0x4A: - case 0x4D: + case 0x37: // Bay Trail + case 0x4A: // Merrifield + case 0x4D: // Avoton, Rangeley case 0x5A: // Moorefield case 0x5D: // SoFIA return cpuinfo_uarch_silvermont; - case 0x4C: // Braswell - case 0x5F: // Denverton + case 0x4C: // Braswell, Cherry Trail case 0x75: // Spreadtrum SC9853I-IA - case 0x7A: // Goldmont+ return cpuinfo_uarch_airmont; - + case 0x5C: // Apollo Lake + case 0x5F: // Denverton + return cpuinfo_uarch_goldmont; + case 0x7A: // Gemini Lake + return cpuinfo_uarch_goldmont_plus; /* Knights-series cores */ case 0x57: return cpuinfo_uarch_knights_landing; @@ -190,7 +198,15 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( return cpuinfo_uarch_jaguar; } case 0x17: - return cpuinfo_uarch_zen; + switch (model_info->model) { + case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl + case 0x08: // 12 nm Pinnacle Ridge + case 0x11: // 14 nm Raven Ridge + case 0x18: // 12 nm Picasso + return cpuinfo_uarch_zen; + case 0x71: // Matisse + return cpuinfo_uarch_zen2; + } } break; default: diff --git src/x86/windows/init.c src/x86/windows/init.c index eb3498a..7a2090e 100644 --- src/x86/windows/init.c +++ src/x86/windows/init.c @@ -571,6 +571,8 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV cpuinfo_clusters_count = packages_count; cpuinfo_packages_count = packages_count; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + MemoryBarrier(); cpuinfo_is_initialized = true; diff --git tools/cache-info.c tools/cache-info.c index ba0706f..05f69ee 100644 --- tools/cache-info.c +++ tools/cache-info.c @@ -60,6 +60,8 @@ int main(int argc, char** argv) { fprintf(stderr, "failed to initialize CPU information\n"); exit(EXIT_FAILURE); } + printf("Max cache size (upper bound): %"PRIu32" bytes\n", cpuinfo_get_max_cache_size()); + if (cpuinfo_get_l1i_caches_count() != 0 && (cpuinfo_get_l1i_cache(0)->flags & CPUINFO_CACHE_UNIFIED) == 0) { report_cache(cpuinfo_get_l1i_caches_count(), cpuinfo_get_l1i_cache(0), 1, "instruction"); } diff --git tools/cpu-info.c tools/cpu-info.c index caef424..7fa5187 100644 --- tools/cpu-info.c +++ tools/cpu-info.c @@ -73,8 +73,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Broadwell"; case cpuinfo_uarch_sky_lake: return "Sky Lake"; - case cpuinfo_uarch_kaby_lake: - return "Kaby Lake"; + case cpuinfo_uarch_palm_cove: + return "Palm Cove"; + case cpuinfo_uarch_sunny_cove: + return "Sunny Cove"; case cpuinfo_uarch_willamette: return "Willamette"; case cpuinfo_uarch_prescott: @@ -87,6 +89,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Silvermont"; case cpuinfo_uarch_airmont: return "Airmont"; + case cpuinfo_uarch_goldmont: + return "Goldmont"; + case cpuinfo_uarch_goldmont_plus: + return "Goldmont Plus"; case cpuinfo_uarch_knights_ferry: return "Knights Ferry"; case cpuinfo_uarch_knights_corner: @@ -117,6 +123,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Excavator"; case cpuinfo_uarch_zen: return "Zen"; + case cpuinfo_uarch_zen2: + return "Zen 2"; case cpuinfo_uarch_geode: return "Geode"; case cpuinfo_uarch_bobcat: @@ -157,6 +165,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Cortex-A55"; case cpuinfo_uarch_cortex_a57: return "Cortex-A57"; + case cpuinfo_uarch_cortex_a65: + return "Cortex-A65"; case cpuinfo_uarch_cortex_a72: return "Cortex-A72"; case cpuinfo_uarch_cortex_a73: @@ -165,6 +175,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Cortex-A75"; case cpuinfo_uarch_cortex_a76: return "Cortex-A76"; + case cpuinfo_uarch_cortex_a76ae: + return "Cortex-A76AE"; + case cpuinfo_uarch_cortex_a77: + return "Cortex-A77"; case cpuinfo_uarch_scorpion: return "Scorpion"; case cpuinfo_uarch_krait: @@ -181,12 +195,16 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Denver 2"; case cpuinfo_uarch_carmel: return "Carmel"; - case cpuinfo_uarch_mongoose_m1: - return "Mongoose M1"; - case cpuinfo_uarch_mongoose_m2: - return "Mongoose M2"; - case cpuinfo_uarch_meerkat_m3: - return "Meerkat M3"; + case cpuinfo_uarch_exynos_m1: + return "Exynos M1"; + case cpuinfo_uarch_exynos_m2: + return "Exynos M2"; + case cpuinfo_uarch_exynos_m3: + return "Exynos M3"; + case cpuinfo_uarch_exynos_m4: + return "Exynos M4"; + case cpuinfo_uarch_exynos_m5: + return "Exynos M5"; case cpuinfo_uarch_swift: return "Swift"; case cpuinfo_uarch_cyclone: @@ -258,13 +276,23 @@ int main(int argc, char** argv) { printf(", %s %s\n", vendor_string, uarch_string); } } - printf("Logical processors:\n"); + printf("Logical processors"); + #if defined(__linux__) + printf(" (System ID)"); + #endif + printf(":\n"); for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) { const struct cpuinfo_processor* processor = cpuinfo_get_processor(i); + printf("\t%"PRIu32"", i); + + #if defined(__linux__) + printf(" (%"PRId32")", processor->linux_id); + #endif + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 - printf("\t%"PRIu32": APIC ID 0x%08"PRIx32"\n", i, processor->apic_id); + printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id); #else - printf("\t%"PRIu32"\n", i); + printf("\n"); #endif } } diff --git tools/isa-info.c tools/isa-info.c index 594c46a..98ef919 100644 --- tools/isa-info.c +++ tools/isa-info.c @@ -67,6 +67,8 @@ int main(int argc, char** argv) { printf("\tAVX512BITALG: %s\n", cpuinfo_has_x86_avx512bitalg() ? "yes" : "no"); printf("\tAVX512VPOPCNTDQ: %s\n", cpuinfo_has_x86_avx512vpopcntdq() ? "yes" : "no"); printf("\tAVX512VNNI: %s\n", cpuinfo_has_x86_avx512vnni() ? "yes" : "no"); + printf("\tAVX512BF16: %s\n", cpuinfo_has_x86_avx512bf16() ? "yes" : "no"); + printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no"); printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no"); printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");