1 // Copyright 2006 Google Inc. All Rights Reserved. 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 7 // http://www.apache.org/licenses/LICENSE-2.0 8 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // sat.h : sat stress test object interface and data structures 16 17 #ifndef STRESSAPPTEST_SAT_H_ 18 #define STRESSAPPTEST_SAT_H_ 19 20 #include <signal.h> 21 22 #include <map> 23 #include <string> 24 #include <vector> 25 26 // This file must work with autoconf on its public version, 27 // so these includes are correct. 28 #include "finelock_queue.h" 29 #include "queue.h" 30 #include "sattypes.h" 31 #include "worker.h" 32 #include "os.h" 33 34 // SAT stress test class. 35 class Sat { 36 public: 37 // Enum for page queue implementation switch. 38 enum PageQueueType { SAT_ONELOCK, SAT_FINELOCK }; 39 40 Sat(); 41 virtual ~Sat(); 42 43 // Read configuration from arguments. Called first. 44 bool ParseArgs(int argc, char **argv); 45 virtual bool CheckGoogleSpecificArgs(int argc, char **argv, int *i); 46 // Initialize data structures, subclasses, and resources, 47 // based on command line args. 48 // Called after ParseArgs(). 49 bool Initialize(); 50 51 // Execute the test. Initialize() and ParseArgs() must be called first. 52 // This must be called from a single-threaded program. 53 bool Run(); 54 55 // Pretty print result summary. 56 // Called after Run(). 57 // Return value is success or failure of the SAT run, *not* of this function! 58 bool PrintResults(); 59 60 // Pretty print version info. 61 bool PrintVersion(); 62 63 // Pretty print help. 64 virtual void PrintHelp(); 65 66 // Clean up allocations and resources. 67 // Called last. 68 bool Cleanup(); 69 70 // Abort Run(). Only for use by Run()-installed signal handlers. Break()71 void Break() { user_break_ = true; } 72 73 // Fetch and return empty and full pages into the empty and full pools. 74 bool GetValid(struct page_entry *pe); 75 bool PutValid(struct page_entry *pe); 76 bool GetEmpty(struct page_entry *pe); 77 bool PutEmpty(struct page_entry *pe); 78 79 bool GetValid(struct page_entry *pe, int32 tag); 80 bool GetEmpty(struct page_entry *pe, int32 tag); 81 82 // Accessor functions. verbosity()83 int verbosity() const { return verbosity_; } logfile()84 int logfile() const { return logfile_; } page_length()85 int page_length() const { return page_length_; } disk_pages()86 int disk_pages() const { return disk_pages_; } strict()87 int strict() const { return strict_; } tag_mode()88 int tag_mode() const { return tag_mode_; } status()89 int status() const { return statuscount_; } bad_status()90 void bad_status() { statuscount_++; } errors()91 int errors() const { return errorcount_; } warm()92 int warm() const { return warm_; } stop_on_error()93 bool stop_on_error() const { return stop_on_error_; } region_mask()94 int32 region_mask() const { return region_mask_; } 95 // Semi-accessor to find the "nth" region to avoid replicated bit searching.. region_find(int32 num)96 int32 region_find(int32 num) const { 97 for (int i = 0; i < 32; i++) { 98 if ((1 << i) & region_mask_) { 99 if (num == 0) 100 return i; 101 num--; 102 } 103 } 104 return 0; 105 } 106 107 // Causes false errors for unittesting. 108 // Setting to "true" causes errors to be injected. set_error_injection(bool errors)109 void set_error_injection(bool errors) { error_injection_ = errors; } error_injection()110 bool error_injection() const { return error_injection_; } 111 112 protected: 113 // Opens log file for writing. Returns 0 on failure. 114 bool InitializeLogfile(); 115 // Checks for supported environment. Returns 0 on failure. 116 bool CheckEnvironment(); 117 // Allocates size_ bytes of test memory. 118 bool AllocateMemory(); 119 // Initializes datapattern reference structures. 120 bool InitializePatterns(); 121 // Initializes test memory with datapatterns. 122 bool InitializePages(); 123 124 // Start up worker threads. 125 virtual void InitializeThreads(); 126 // Spawn worker threads. 127 void SpawnThreads(); 128 // Reap worker threads. 129 void JoinThreads(); 130 // Run bandwidth and error analysis. 131 virtual void RunAnalysis(); 132 // Delete worker threads. 133 void DeleteThreads(); 134 135 // Return the number of cpus in the system. 136 int CpuCount(); 137 138 // Collect error counts from threads. 139 int64 GetTotalErrorCount(); 140 141 // Command line arguments. 142 string cmdline_; 143 144 // Memory and test configuration. 145 int runtime_seconds_; // Seconds to run. 146 int page_length_; // Length of each memory block. 147 int64 pages_; // Number of memory blocks. 148 int64 size_; // Size of memory tested, in bytes. 149 int64 size_mb_; // Size of memory tested, in MB. 150 int64 min_hugepages_mbytes_; // Minimum hugepages size. 151 int64 freepages_; // How many invalid pages we need. 152 int disk_pages_; // Number of pages per temp file. 153 uint64 paddr_base_; // Physical address base. 154 155 // Control flags. 156 volatile sig_atomic_t user_break_; // User has signalled early exit. Used as 157 // a boolean. 158 int verbosity_; // How much to print. 159 int strict_; // Check results per transaction. 160 int warm_; // FPU warms CPU while coying. 161 int address_mode_; // 32 or 64 bit binary. 162 bool stop_on_error_; // Exit immendiately on any error. 163 bool findfiles_; // Autodetect tempfile locations. 164 165 bool error_injection_; // Simulate errors, for unittests. 166 bool crazy_error_injection_; // Simulate lots of errors. 167 uint64 max_errorcount_; // Number of errors before forced exit. 168 int run_on_anything_; // Ignore unknown machine ereor. 169 int use_logfile_; // Log to a file. 170 char logfilename_[255]; // Name of file to log to. 171 int logfile_; // File handle to log to. 172 173 // Disk thread options. 174 int read_block_size_; // Size of block to read from disk. 175 int write_block_size_; // Size of block to write to disk. 176 int64 segment_size_; // Size of segment to split disk into. 177 int cache_size_; // Size of disk cache. 178 int blocks_per_segment_; // Number of blocks to test per segment. 179 int read_threshold_; // Maximum time (in us) a read should take 180 // before warning of a slow read. 181 int write_threshold_; // Maximum time (in us) a write should 182 // take before warning of a slow write. 183 int non_destructive_; // Whether to use non-destructive mode for 184 // the disk test. 185 186 // Generic Options. 187 int monitor_mode_; // Switch for monitor-only mode SAT. 188 // This switch trumps most of the other 189 // argument, as SAT will only run error 190 // polling threads. 191 int tag_mode_; // Do tagging of memory and strict 192 // checking for misplaced cachelines. 193 194 bool do_page_map_; // Should we print a list of used pages? 195 unsigned char *page_bitmap_; // Store bitmap of physical pages seen. 196 uint64 page_bitmap_size_; // Length of physical memory represented. 197 198 // Cpu Cache Coherency Options. 199 bool cc_test_; // Flag to decide whether to start the 200 // cache coherency threads. 201 int cc_cacheline_count_; // Number of cache line size structures. 202 int cc_inc_count_; // Number of times to increment the shared 203 // cache lines structure members. 204 205 // Thread control. 206 int file_threads_; // Threads of file IO. 207 int net_threads_; // Threads of network IO. 208 int listen_threads_; // Threads for network IO to connect. 209 int memory_threads_; // Threads of memcpy. 210 int invert_threads_; // Threads of invert. 211 int fill_threads_; // Threads of memset. 212 int check_threads_; // Threads of strcmp. 213 int cpu_stress_threads_; // Threads of CPU stress workload. 214 int disk_threads_; // Threads of disk test. 215 int random_threads_; // Number of random disk threads. 216 int total_threads_; // Total threads used. 217 bool error_poll_; // Poll for system errors. 218 219 // Resources. 220 cc_cacheline_data *cc_cacheline_data_; // The cache line sized datastructure 221 // used by the ccache threads 222 // (in worker.h). 223 vector<string> filename_; // Filenames for file IO. 224 vector<string> ipaddrs_; // Addresses for network IO. 225 vector<string> diskfilename_; // Filename for disk IO device. 226 // Block table for IO device. 227 vector<DiskBlockTable*> blocktables_; 228 229 int32 region_mask_; // Bitmask of available NUMA regions. 230 int32 region_count_; // Count of available NUMA regions. 231 int32 region_[32]; // Pagecount per region. 232 int region_mode_; // What to do with NUMA hints? 233 static const int kLocalNuma = 1; // Target local memory. 234 static const int kRemoteNuma = 2; // Target remote memory. 235 236 // Results. 237 int64 errorcount_; // Total hardware incidents seen. 238 int statuscount_; // Total test errors seen. 239 240 // Thread type constants and types 241 enum ThreadType { 242 kMemoryType = 0, 243 kFileIOType = 1, 244 kNetIOType = 2, 245 kNetSlaveType = 3, 246 kCheckType = 4, 247 kInvertType = 5, 248 kDiskType = 6, 249 kRandomDiskType = 7, 250 kCPUType = 8, 251 kErrorType = 9, 252 kCCType = 10 253 }; 254 255 // Helper functions. 256 virtual void AcquireWorkerLock(); 257 virtual void ReleaseWorkerLock(); 258 pthread_mutex_t worker_lock_; // Lock access to the worker thread structure. 259 typedef vector<WorkerThread*> WorkerVector; 260 typedef map<int, WorkerVector*> WorkerMap; 261 // Contains all worker threads. 262 WorkerMap workers_map_; 263 // Delay between power spikes. 264 time_t pause_delay_; 265 // The duration of each pause (for power spikes). 266 time_t pause_duration_; 267 // For the workers we pause and resume to create power spikes. 268 WorkerStatus power_spike_status_; 269 // For the workers we never pause. 270 WorkerStatus continuous_status_; 271 272 class OsLayer *os_; // Os abstraction: put hacks here. 273 class PatternList *patternlist_; // Access to global data patterns. 274 275 // RunAnalysis methods 276 void AnalysisAllStats(); // Summary of all runs. 277 void MemoryStats(); 278 void FileStats(); 279 void NetStats(); 280 void CheckStats(); 281 void InvertStats(); 282 void DiskStats(); 283 284 void QueueStats(); 285 286 // Physical page use reporting. 287 void AddrMapInit(); 288 void AddrMapUpdate(struct page_entry *pe); 289 void AddrMapPrint(); 290 291 // additional memory data from google-specific tests. 292 virtual void GoogleMemoryStats(float *memcopy_data, 293 float *memcopy_bandwidth); 294 295 virtual void GoogleOsOptions(std::map<std::string, std::string> *options); 296 297 // Page queues, only one of (valid_+empty_) or (finelock_q_) will be used 298 // at a time. A commandline switch controls which queue implementation will 299 // be used. 300 class PageEntryQueue *valid_; // Page queue structure, valid pages. 301 class PageEntryQueue *empty_; // Page queue structure, free pages. 302 class FineLockPEQueue *finelock_q_; // Page queue with fine-grain locks 303 Sat::PageQueueType pe_q_implementation_; // Queue implementation switch 304 305 DISALLOW_COPY_AND_ASSIGN(Sat); 306 }; 307 308 Sat *SatFactory(); 309 310 #endif // STRESSAPPTEST_SAT_H_ 311