1 //===-- Benchmark memory specific tools -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // This file complements the `benchmark` header with memory specific tools and 10 // benchmarking facilities. 11 12 #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 13 #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 14 15 #include "LibcBenchmark.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Support/Alignment.h" 19 #include <cstdint> 20 #include <random> 21 22 namespace llvm { 23 namespace libc_benchmarks { 24 25 //-------------- 26 // Configuration 27 //-------------- 28 29 // Specifies a range of sizes to explore. 30 struct SizeRange { 31 uint32_t From = 0; // Inclusive 32 uint32_t To = 1024; // Inclusive 33 uint32_t Step = 1; 34 }; 35 36 // An object to define how to test a memory function. 37 struct StudyConfiguration { 38 // The number of run for the study. 39 uint32_t Runs = 1; 40 41 // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp). 42 // When testing small sizes, it's important to keep the total allocated 43 // size under the size of the L1 cache (usually 16 or 32KiB). The framework 44 // will also use 2KiB of additional L1 memory to store the function 45 // parameters. 46 uint32_t BufferSize = 8192; 47 48 // The range of sizes to exercise. 49 SizeRange Size; 50 51 MaybeAlign AddressAlignment; // Unset : Use start of buffer which is at 52 // least cache line aligned) 53 // 1 : Use random address, 54 // >1 : Use random address aligned to value. 55 56 // The value to use for memset. 57 uint8_t MemsetValue = 0; 58 59 // The mismatch position for memcmp. 60 uint32_t MemcmpMismatchAt = 0; // 0 : Buffer compare equal, 61 // >0 : Buffer compare different at byte N-1. 62 }; 63 64 //-------- 65 // Results 66 //-------- 67 68 // The time to run one iteration of the function under test for the specified 69 // Size. 70 struct Measurement { 71 uint32_t Size = 0; 72 Duration Runtime = {}; 73 }; 74 75 // The measurements for a specific function. 76 struct FunctionMeasurements { 77 std::string Name; 78 std::vector<Measurement> Measurements; 79 }; 80 81 // The root object containing all the data (configuration and measurements). 82 struct Study { 83 HostState Host; 84 BenchmarkOptions Options; 85 StudyConfiguration Configuration; 86 SmallVector<FunctionMeasurements, 4> Functions; 87 }; 88 89 // Provides an aligned, dynamically allocated buffer. 90 class AlignedBuffer { 91 char *const Buffer = nullptr; 92 size_t Size = 0; 93 94 public: 95 static constexpr size_t Alignment = 1024; 96 AlignedBuffer(size_t Size)97 explicit AlignedBuffer(size_t Size) 98 : Buffer(static_cast<char *>(aligned_alloc(1024, Size))), Size(Size) {} ~AlignedBuffer()99 ~AlignedBuffer() { free(Buffer); } 100 101 inline char *operator+(size_t Index) { return Buffer + Index; } 102 inline const char *operator+(size_t Index) const { return Buffer + Index; } 103 inline char &operator[](size_t Index) { return Buffer[Index]; } 104 inline const char &operator[](size_t Index) const { return Buffer[Index]; } begin()105 inline char *begin() { return Buffer; } end()106 inline char *end() { return Buffer + Size; } 107 }; 108 109 // Implements the ParameterProvider abstraction needed by the `benchmark` 110 // function. This implementation makes sure that all parameters will fit into 111 // `StorageSize` bytes. The total memory accessed during benchmark should be 112 // less than the data L1 cache, that is the storage for the ParameterProvider 113 // and the memory buffers. 114 template <typename Context, size_t StorageSize = 8 * 1024> 115 class SmallParameterProvider { 116 using ParameterType = typename Context::ParameterType; 117 ByteConstrainedArray<ParameterType, StorageSize> Parameters; 118 size_t LastIterations; 119 Context &Ctx; 120 121 public: SmallParameterProvider(Context & C)122 explicit SmallParameterProvider(Context &C) : Ctx(C) {} 123 SmallParameterProvider(const SmallParameterProvider &) = delete; 124 SmallParameterProvider &operator=(const SmallParameterProvider &) = delete; 125 126 // Useful to compute the histogram of the size parameter. getLastBatch()127 CircularArrayRef<ParameterType> getLastBatch() const { 128 return cycle(Parameters, LastIterations); 129 } 130 131 // Implements the interface needed by the `benchmark` function. generateBatch(size_t Iterations)132 CircularArrayRef<ParameterType> generateBatch(size_t Iterations) { 133 LastIterations = Iterations; 134 Ctx.Randomize(Parameters); 135 return getLastBatch(); 136 } 137 }; 138 139 // Helper to generate random buffer offsets that satisfy the configuration 140 // constraints. 141 class OffsetDistribution { 142 std::uniform_int_distribution<uint32_t> Distribution; 143 uint32_t Factor; 144 145 public: 146 explicit OffsetDistribution(const StudyConfiguration &Conf); 147 operator()148 template <class Generator> uint32_t operator()(Generator &G) { 149 return Distribution(G) * Factor; 150 } 151 }; 152 153 // Helper to generate random buffer offsets that satisfy the configuration 154 // constraints. It is specifically designed to benchmark `memcmp` functions 155 // where we may want the Nth byte to differ. 156 class MismatchOffsetDistribution { 157 std::uniform_int_distribution<size_t> MismatchIndexSelector; 158 llvm::SmallVector<uint32_t, 16> MismatchIndices; 159 const uint32_t MismatchAt; 160 161 public: 162 explicit MismatchOffsetDistribution(const StudyConfiguration &Conf); 163 164 explicit operator bool() const { return !MismatchIndices.empty(); } 165 getMismatchIndices()166 const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const { 167 return MismatchIndices; 168 } 169 operator()170 template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) { 171 const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)]; 172 // We need to position the offset so that a mismatch occurs at MismatchAt. 173 if (Size >= MismatchAt) 174 return MismatchIndex - MismatchAt; 175 // Size is too small to trigger the mismatch. 176 return MismatchIndex - Size - 1; 177 } 178 }; 179 180 } // namespace libc_benchmarks 181 } // namespace llvm 182 183 #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H 184