1 // Copyright 2006 Google Inc. All Rights Reserved. 2 // Author: nsanders, menderico 3 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 8 // http://www.apache.org/licenses/LICENSE-2.0 9 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #ifndef STRESSAPPTEST_OS_H_ // NOLINT 17 #define STRESSAPPTEST_OS_H_ 18 19 #include <dirent.h> 20 #include <unistd.h> 21 #include <sys/syscall.h> 22 23 #include <string> 24 #include <list> 25 #include <map> 26 #include <vector> 27 28 // This file must work with autoconf on its public version, 29 // so these includes are correct. 30 #include "adler32memcpy.h" // NOLINT 31 #include "sattypes.h" // NOLINT 32 #include "clock.h" // NOLINT 33 34 const char kPagemapPath[] = "/proc/self/pagemap"; 35 36 struct PCIDevice { 37 int32 domain; 38 uint16 bus; 39 uint8 dev; 40 uint8 func; 41 uint16 vendor_id; 42 uint16 device_id; 43 uint64 base_addr[6]; 44 uint64 size[6]; 45 }; 46 47 typedef vector<PCIDevice*> PCIDevices; 48 49 class ErrorDiag; 50 51 class Clock; 52 53 // This class implements OS/Platform specific funtions. 54 class OsLayer { 55 public: 56 OsLayer(); 57 virtual ~OsLayer(); 58 59 // Set the minimum amount of hugepages that should be available for testing. 60 // Must be set before Initialize(). SetMinimumHugepagesSize(int64 min_bytes)61 void SetMinimumHugepagesSize(int64 min_bytes) { 62 min_hugepages_bytes_ = min_bytes; 63 } 64 65 // Set the minium amount of memory that should not be allocated. This only 66 // has any affect if hugepages are not used. 67 // Must be set before Initialize(). SetReserveSize(int64 reserve_mb)68 void SetReserveSize(int64 reserve_mb) { 69 reserve_mb_ = reserve_mb; 70 } 71 72 // Set parameters needed to translate physical address to memory module. SetDramMappingParams(uintptr_t channel_hash,int channel_width,vector<vector<string>> * channels)73 void SetDramMappingParams(uintptr_t channel_hash, int channel_width, 74 vector< vector<string> > *channels) { 75 channel_hash_ = channel_hash; 76 channel_width_ = channel_width; 77 channels_ = channels; 78 } 79 80 // Initializes data strctures and open files. 81 // Returns false on error. 82 virtual bool Initialize(); 83 84 // Virtual to physical. This implementation is optional for 85 // subclasses to implement. 86 // Takes a pointer, and returns the corresponding bus address. 87 virtual uint64 VirtualToPhysical(void *vaddr); 88 89 // Prints failed dimm. This implementation is optional for 90 // subclasses to implement. 91 // Takes a bus address and string, and prints the DIMM name 92 // into the string. Returns the DIMM number that corresponds to the 93 // address given, or -1 if unable to identify the DIMM number. 94 // Note that subclass implementations of FindDimm() MUST fill 95 // buf with at LEAST one non-whitespace character (provided len > 0). 96 virtual int FindDimm(uint64 addr, char *buf, int len); 97 98 // Classifies addresses according to "regions" 99 // This may mean different things on different platforms. 100 virtual int32 FindRegion(uint64 paddr); 101 // Find cpu cores associated with a region. Either NUMA or arbitrary. 102 virtual cpu_set_t *FindCoreMask(int32 region); 103 // Return cpu cores associated with a region in a hex string. 104 virtual string FindCoreMaskFormat(int32 region); 105 106 // Returns the HD device that contains this file. 107 virtual string FindFileDevice(string filename); 108 109 // Returns a list of paths coresponding to HD devices found on this machine. 110 virtual list<string> FindFileDevices(); 111 112 // Polls for errors. This implementation is optional. 113 // This will poll once for errors and return zero iff no errors were found. 114 virtual int ErrorPoll(); 115 116 // Delay an appropriate amount of time between polling. 117 virtual void ErrorWait(); 118 119 // Report errors. This implementation is mandatory. 120 // This will output a machine readable line regarding the error. 121 virtual bool ErrorReport(const char *part, const char *symptom, int count); 122 123 // Flushes page cache. Used to circumvent the page cache when doing disk 124 // I/O. This will be a NOP until ActivateFlushPageCache() is called, which 125 // is typically done when opening a file with O_DIRECT fails. 126 // Returns false on error, true on success or NOP. 127 // Subclasses may implement this in machine specific ways.. 128 virtual bool FlushPageCache(void); 129 // Enable FlushPageCache() to actually do the flush instead of being a NOP. 130 virtual void ActivateFlushPageCache(void); 131 132 // Flushes cacheline. Used to distinguish read or write errors. 133 // Subclasses may implement this in machine specific ways.. 134 // Takes a pointer, and flushed the cacheline containing that pointer. 135 virtual void Flush(void *vaddr); 136 137 // Fast flush, for use in performance critical code. 138 // This is bound at compile time, and will not pick up 139 // any runtime machine configuration info. FastFlush(void * vaddr)140 inline static void FastFlush(void *vaddr) { 141 #ifdef STRESSAPPTEST_CPU_PPC 142 asm volatile("dcbf 0,%0; sync" : : "r" (vaddr)); 143 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 144 // Put mfence before and after clflush to make sure: 145 // 1. The write before the clflush is committed to memory bus; 146 // 2. The read after the clflush is hitting the memory bus. 147 // 148 // From Intel manual: 149 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 150 // to be ordered by any other fencing, serializing or other CLFLUSH 151 // instruction. For example, software can use an MFENCE instruction to 152 // insure that previous stores are included in the write-back. 153 asm volatile("mfence"); 154 asm volatile("clflush (%0)" : : "r" (vaddr)); 155 asm volatile("mfence"); 156 #elif defined(STRESSAPPTEST_CPU_ARMV7A) 157 // ARMv7a cachelines are 8 words (32 bytes). 158 syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0); 159 #elif defined(STRESSAPPTEST_CPU_AARCH64) 160 asm volatile("dc cvau, %0" : : "r" (vaddr)); 161 asm volatile("dsb ish"); 162 asm volatile("ic ivau, %0" : : "r" (vaddr)); 163 asm volatile("dsb ish"); 164 asm volatile("isb"); 165 #else 166 #warning "Unsupported CPU type: Unable to force cache flushes." 167 #endif 168 } 169 170 // Fast flush, for use in performance critical code. 171 // This is bound at compile time, and will not pick up 172 // any runtime machine configuration info. Takes a NULL-terminated 173 // array of addresses to flush. FastFlushList(void ** vaddrs)174 inline static void FastFlushList(void **vaddrs) { 175 #ifdef STRESSAPPTEST_CPU_PPC 176 while (*vaddrs) { 177 asm volatile("dcbf 0,%0" : : "r" (*vaddrs++)); 178 } 179 asm volatile("sync"); 180 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 181 // Put mfence before and after clflush to make sure: 182 // 1. The write before the clflush is committed to memory bus; 183 // 2. The read after the clflush is hitting the memory bus. 184 // 185 // From Intel manual: 186 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 187 // to be ordered by any other fencing, serializing or other CLFLUSH 188 // instruction. For example, software can use an MFENCE instruction to 189 // insure that previous stores are included in the write-back. 190 asm volatile("mfence"); 191 while (*vaddrs) { 192 asm volatile("clflush (%0)" : : "r" (*vaddrs++)); 193 } 194 asm volatile("mfence"); 195 #elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64) 196 while (*vaddrs) { 197 FastFlush(*vaddrs++); 198 } 199 #else 200 #warning "Unsupported CPU type: Unable to force cache flushes." 201 #endif 202 } 203 204 // Fast flush hint, for use in performance critical code. 205 // This is bound at compile time, and will not pick up 206 // any runtime machine configuration info. Note that this 207 // will not guarantee that a flush happens, but will at least 208 // hint that it should. This is useful for speeding up 209 // parallel march algorithms. FastFlushHint(void * vaddr)210 inline static void FastFlushHint(void *vaddr) { 211 #ifdef STRESSAPPTEST_CPU_PPC 212 asm volatile("dcbf 0,%0" : : "r" (vaddr)); 213 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 214 // From Intel manual: 215 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 216 // to be ordered by any other fencing, serializing or other CLFLUSH 217 // instruction. For example, software can use an MFENCE instruction to 218 // insure that previous stores are included in the write-back. 219 asm volatile("clflush (%0)" : : "r" (vaddr)); 220 #elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64) 221 FastFlush(vaddr); 222 #else 223 #warning "Unsupported CPU type: Unable to force cache flushes." 224 #endif 225 } 226 227 // Fast flush, for use in performance critical code. 228 // This is bound at compile time, and will not pick up 229 // any runtime machine configuration info. Sync's any 230 // transactions for ordering FastFlushHints. FastFlushSync()231 inline static void FastFlushSync() { 232 #ifdef STRESSAPPTEST_CPU_PPC 233 asm volatile("sync"); 234 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 235 // Put mfence before and after clflush to make sure: 236 // 1. The write before the clflush is committed to memory bus; 237 // 2. The read after the clflush is hitting the memory bus. 238 // 239 // From Intel manual: 240 // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed 241 // to be ordered by any other fencing, serializing or other CLFLUSH 242 // instruction. For example, software can use an MFENCE instruction to 243 // insure that previous stores are included in the write-back. 244 asm volatile("mfence"); 245 #elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64) 246 // This is a NOP, FastFlushHint() always does a full flush, so there's 247 // nothing to do for FastFlushSync(). 248 #else 249 #warning "Unsupported CPU type: Unable to force cache flushes." 250 #endif 251 } 252 253 // Get time in cpu timer ticks. Useful for matching MCEs with software 254 // actions. GetTimestamp(void)255 inline static uint64 GetTimestamp(void) { 256 uint64 tsc; 257 #ifdef STRESSAPPTEST_CPU_PPC 258 uint32 tbl, tbu, temp; 259 __asm __volatile( 260 "1:\n" 261 "mftbu %2\n" 262 "mftb %0\n" 263 "mftbu %1\n" 264 "cmpw %2,%1\n" 265 "bne 1b\n" 266 : "=r"(tbl), "=r"(tbu), "=r"(temp) 267 : 268 : "cc"); 269 270 tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl); 271 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 272 datacast_t data; 273 __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h)); 274 tsc = data.l64; 275 #elif defined(STRESSAPPTEST_CPU_ARMV7A) 276 #warning "Unsupported CPU type ARMV7A: your timer may not function correctly" 277 tsc = 0; 278 #elif defined(STRESSAPPTEST_CPU_AARCH64) 279 __asm __volatile("mrs %0, CNTVCT_EL0" : "=r" (tsc) : : ); 280 #else 281 #warning "Unsupported CPU type: your timer may not function correctly" 282 tsc = 0; 283 #endif 284 return (tsc); 285 } 286 287 // Find the free memory on the machine. 288 virtual int64 FindFreeMemSize(); 289 290 // Allocates test memory of length bytes. 291 // Subclasses must implement this. 292 // Call PepareTestMem to get a pointer. 293 virtual int64 AllocateAllMem(); // Returns length. 294 // Returns success. 295 virtual bool AllocateTestMem(int64 length, uint64 paddr_base); 296 virtual void FreeTestMem(); 297 298 // Prepares the memory for use. You must call this 299 // before using test memory, and after you are done. 300 virtual void *PrepareTestMem(uint64 offset, uint64 length); 301 virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length); 302 303 // Machine type detected. Can we implement all these functions correctly? 304 // Returns true if machine type is detected and implemented. 305 virtual bool IsSupported(); 306 307 // Returns 32 for 32-bit, 64 for 64-bit. 308 virtual int AddressMode(); 309 // Update OsLayer state regarding cpu support for various features. 310 virtual void GetFeatures(); 311 312 // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file. 313 virtual int PciOpen(int bus, int device, int function); 314 virtual void PciWrite(int fd, uint32 offset, uint32 value, int width); 315 virtual uint32 PciRead(int fd, uint32 offset, int width); 316 317 // Read MSRs 318 virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data); 319 virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data); 320 321 // Extract bits [n+len-1, n] from a 32 bit word. 322 // so GetBitField(0x0f00, 8, 4) == 0xf. 323 virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len); 324 325 // Platform and CPU specific CPU-stressing function. 326 // Returns true on success, false otherwise. 327 virtual bool CpuStressWorkload(); 328 329 // Causes false errors for unittesting. 330 // Setting to "true" causes errors to be injected. set_error_injection(bool errors)331 void set_error_injection(bool errors) { error_injection_ = errors; } error_injection()332 bool error_injection() const { return error_injection_; } 333 334 // Is SAT using normal malloc'd memory, or exotic mmap'd memory. normal_mem()335 bool normal_mem() const { return normal_mem_; } 336 337 // Get numa config, if available.. num_nodes()338 int num_nodes() const { return num_nodes_; } num_cpus()339 int num_cpus() const { return num_cpus_; } 340 341 // Handle to platform-specific error diagnoser. 342 ErrorDiag *error_diagnoser_; 343 344 // Disambiguate between different "warm" memcopies. 345 virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, 346 unsigned int size_in_bytes, 347 AdlerChecksum *checksum); 348 349 // Store a callback to use to print 350 // app-specific info about the last error location. 351 // This call back is called with a physical address, and the app can fill in 352 // the most recent transaction that occurred at that address. 353 typedef bool (*ErrCallback)(uint64 paddr, string *buf); set_err_log_callback(ErrCallback err_log_callback)354 void set_err_log_callback( 355 ErrCallback err_log_callback) { 356 err_log_callback_ = err_log_callback; 357 } get_err_log_callback()358 ErrCallback get_err_log_callback() { return err_log_callback_; } 359 360 // Set a clock object that can be overridden for use with unit tests. SetClock(Clock * clock)361 void SetClock(Clock *clock) { 362 if (clock_) { 363 delete clock_; 364 } 365 clock_ = clock; 366 time_initialized_ = clock_->Now(); 367 } 368 369 protected: 370 void *testmem_; // Location of test memory. 371 uint64 testmemsize_; // Size of test memory. 372 int64 totalmemsize_; // Size of available memory. 373 int64 min_hugepages_bytes_; // Minimum hugepages size. 374 int64 reserve_mb_; // Minimum amount of memory to reserve in MB. 375 bool error_injection_; // Do error injection? 376 bool normal_mem_; // Memory DMA capable? 377 bool use_hugepages_; // Use hugepage shmem? 378 bool use_posix_shm_; // Use 4k page shmem? 379 bool dynamic_mapped_shmem_; // Conserve virtual address space. 380 bool mmapped_allocation_; // Was memory allocated using mmap()? 381 int shmid_; // Handle to shmem 382 vector< vector<string> > *channels_; // Memory module names per channel. 383 uint64 channel_hash_; // Mask of address bits XORed for channel. 384 int channel_width_; // Channel width in bits. 385 386 int64 regionsize_; // Size of memory "regions" 387 int regioncount_; // Number of memory "regions" 388 int num_cpus_; // Number of cpus in the system. 389 int num_nodes_; // Number of nodes in the system. 390 int num_cpus_per_node_; // Number of cpus per node in the system. 391 int address_mode_; // Are we running 32 or 64 bit? 392 bool has_vector_; // Do we have sse2/neon instructions? 393 bool has_clflush_; // Do we have clflush instructions? 394 bool use_flush_page_cache_; // Do we need to flush the page cache? 395 396 397 time_t time_initialized_; // Start time of test. 398 399 vector<cpu_set_t> cpu_sets_; // Cache for cpu masks. 400 vector<bool> cpu_sets_valid_; // If the cpu mask cache is valid. 401 402 // Get file descriptor for dev msr. 403 virtual int OpenMSR(uint32 core, uint32 address); 404 405 // Look up how many hugepages there are. 406 virtual int64 FindHugePages(); 407 408 // Link to find last transaction at an error location. 409 ErrCallback err_log_callback_; 410 411 // Object to wrap the time function. 412 Clock *clock_; 413 414 private: 415 DISALLOW_COPY_AND_ASSIGN(OsLayer); 416 }; 417 418 // Selects and returns the proper OS and hardware interface. Does not call 419 // OsLayer::Initialize() on the new object. 420 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options); 421 422 #endif // STRESSAPPTEST_OS_H_ NOLINT 423