1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7
8 // http://www.apache.org/licenses/LICENSE-2.0
9
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 // os.cc : os and machine specific implementation
17 // This file includes an abstracted interface
18 // for linux-distro specific and HW specific
19 // interfaces.
20
21 #include "os.h"
22
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/types.h>
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/ioctl.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/ipc.h>
35 #ifdef HAVE_SYS_SHM_H
36 #include <sys/shm.h>
37 #endif
38 #include <unistd.h>
39
40 #ifndef SHM_HUGETLB
41 #define SHM_HUGETLB 04000 // remove when glibc defines it
42 #endif
43
44 #include <string>
45 #include <list>
46
47 // This file must work with autoconf on its public version,
48 // so these includes are correct.
49 #include "sattypes.h"
50 #include "error_diag.h"
51 #include "clock.h"
52
53 // OsLayer initialization.
OsLayer()54 OsLayer::OsLayer() {
55 testmem_ = 0;
56 testmemsize_ = 0;
57 totalmemsize_ = 0;
58 min_hugepages_bytes_ = 0;
59 reserve_mb_ = 0;
60 normal_mem_ = true;
61 use_hugepages_ = false;
62 use_posix_shm_ = false;
63 dynamic_mapped_shmem_ = false;
64 mmapped_allocation_ = false;
65 shmid_ = 0;
66
67 time_initialized_ = 0;
68
69 regionsize_ = 0;
70 regioncount_ = 1;
71 num_cpus_ = 0;
72 num_nodes_ = 0;
73 num_cpus_per_node_ = 0;
74 error_diagnoser_ = 0;
75 err_log_callback_ = 0;
76 error_injection_ = false;
77
78 void *pvoid = 0;
79 address_mode_ = sizeof(pvoid) * 8;
80
81 has_clflush_ = false;
82 has_vector_ = false;
83
84 use_flush_page_cache_ = false;
85
86 clock_ = NULL;
87 }
88
89 // OsLayer cleanup.
~OsLayer()90 OsLayer::~OsLayer() {
91 if (error_diagnoser_)
92 delete error_diagnoser_;
93 if (clock_)
94 delete clock_;
95 }
96
97 // OsLayer initialization.
Initialize()98 bool OsLayer::Initialize() {
99 if (!clock_) {
100 clock_ = new Clock();
101 }
102
103 time_initialized_ = clock_->Now();
104 // Detect asm support.
105 GetFeatures();
106
107 if (num_cpus_ == 0) {
108 num_nodes_ = 1;
109 num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
110 num_cpus_per_node_ = num_cpus_ / num_nodes_;
111 }
112 logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
113 sat_assert(CPU_SETSIZE >= num_cpus_);
114 cpu_sets_.resize(num_nodes_);
115 cpu_sets_valid_.resize(num_nodes_);
116 // Create error diagnoser.
117 error_diagnoser_ = new ErrorDiag();
118 if (!error_diagnoser_->set_os(this))
119 return false;
120 return true;
121 }
122
123 // Machine type detected. Can we implement all these functions correctly?
IsSupported()124 bool OsLayer::IsSupported() {
125 if (kOpenSource) {
126 // There are no explicitly supported systems in open source version.
127 return true;
128 }
129
130 // This is the default empty implementation.
131 // SAT won't report full error information.
132 return false;
133 }
134
AddressMode()135 int OsLayer::AddressMode() {
136 // Detect 32/64 bit binary.
137 void *pvoid = 0;
138 return sizeof(pvoid) * 8;
139 }
140
141 // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)142 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
143 uint64 frame, shift;
144 off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
145 int fd = open(kPagemapPath, O_RDONLY);
146 // /proc/self/pagemap is available in kernel >= 2.6.25
147 if (fd < 0)
148 return 0;
149
150 if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
151 int err = errno;
152 string errtxt = ErrorString(err);
153 logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
154 kPagemapPath, err, errtxt.c_str());
155 if (fd >= 0)
156 close(fd);
157 return 0;
158 }
159 close(fd);
160 if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
161 return 0;
162 shift = (frame >> 55) & 0x3f;
163 frame = (frame & 0x007fffffffffffffLL) << shift;
164 return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
165 }
166
167 // Returns the HD device that contains this file.
FindFileDevice(string filename)168 string OsLayer::FindFileDevice(string filename) {
169 return "hdUnknown";
170 }
171
172 // Returns a list of locations corresponding to HD devices.
FindFileDevices()173 list<string> OsLayer::FindFileDevices() {
174 // No autodetection on unknown systems.
175 list<string> locations;
176 return locations;
177 }
178
179
180 // Get HW core features from cpuid instruction.
GetFeatures()181 void OsLayer::GetFeatures() {
182 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
183 unsigned int eax = 1, ebx, ecx, edx;
184 cpuid(&eax, &ebx, &ecx, &edx);
185 has_clflush_ = (edx >> 19) & 1;
186 has_vector_ = (edx >> 26) & 1; // SSE2 caps bit.
187
188 logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
189 has_clflush_ ? "true" : "false",
190 has_vector_ ? "true" : "false");
191 #elif defined(STRESSAPPTEST_CPU_PPC)
192 // All PPC implementations have cache flush instructions.
193 has_clflush_ = true;
194 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
195 // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
196 // For now assume neon and don't run -W if you don't have it.
197 has_vector_ = true; // NEON.
198 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
199 #else
200 #warning "Unsupported CPU type: unable to determine feature set."
201 #endif
202 }
203
204
205 // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)206 void OsLayer::ActivateFlushPageCache(void) {
207 logprintf(9, "Log: page cache will be flushed as needed\n");
208 use_flush_page_cache_ = true;
209 }
210
211 // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)212 bool OsLayer::FlushPageCache(void) {
213 if (!use_flush_page_cache_)
214 return true;
215
216 // First, ask the kernel to write the cache to the disk.
217 sync();
218
219 // Second, ask the kernel to empty the cache by writing "1" to
220 // "/proc/sys/vm/drop_caches".
221 static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
222 int dcfile = open(drop_caches_file, O_WRONLY);
223 if (dcfile < 0) {
224 int err = errno;
225 string errtxt = ErrorString(err);
226 logprintf(3, "Log: failed to open %s - err %d (%s)\n",
227 drop_caches_file, err, errtxt.c_str());
228 return false;
229 }
230
231 ssize_t bytes_written = write(dcfile, "1", 1);
232 close(dcfile);
233
234 if (bytes_written != 1) {
235 int err = errno;
236 string errtxt = ErrorString(err);
237 logprintf(3, "Log: failed to write %s - err %d (%s)\n",
238 drop_caches_file, err, errtxt.c_str());
239 return false;
240 }
241 return true;
242 }
243
244
245 // We need to flush the cacheline here.
Flush(void * vaddr)246 void OsLayer::Flush(void *vaddr) {
247 // Use the generic flush. This function is just so we can override
248 // this if we are so inclined.
249 if (has_clflush_) {
250 OsLayer::FastFlush(vaddr);
251 }
252 }
253
254
255 // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)256 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
257 unsigned int size_in_bytes,
258 AdlerChecksum *checksum) {
259 if (has_vector_) {
260 return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
261 } else {
262 return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
263 }
264 }
265
266
267 // Translate physical address to memory module/chip name.
268 // Assumes interleaving between two memory channels based on the XOR of
269 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
270 // blocks with bits distributed from each chip in that channel.
FindDimm(uint64 addr,char * buf,int len)271 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
272 if (!channels_) {
273 snprintf(buf, len, "DIMM Unknown");
274 return -1;
275 }
276
277 // Find channel by XORing address bits in channel_hash mask.
278 uint32 low = static_cast<uint32>(addr & channel_hash_);
279 uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
280 vector<string>& channel = (*channels_)[
281 __builtin_parity(high) ^ __builtin_parity(low)];
282
283 // Find dram chip by finding which byte within the channel
284 // by address mod channel width, then divide the channel
285 // evenly among the listed dram chips. Note, this will not work
286 // with x4 dram.
287 int chip = (addr % (channel_width_ / 8)) /
288 ((channel_width_ / 8) / channel.size());
289 string name = channel[chip];
290 snprintf(buf, len, "%s", name.c_str());
291 return 1;
292 }
293
294
295 // Classifies addresses according to "regions"
296 // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)297 int32 OsLayer::FindRegion(uint64 addr) {
298 static bool warned = false;
299
300 if (regionsize_ == 0) {
301 regionsize_ = totalmemsize_ / 8;
302 if (regionsize_ < 512 * kMegabyte)
303 regionsize_ = 512 * kMegabyte;
304 regioncount_ = totalmemsize_ / regionsize_;
305 if (regioncount_ < 1) regioncount_ = 1;
306 }
307
308 int32 region_num = addr / regionsize_;
309 if (region_num >= regioncount_) {
310 if (!warned) {
311 logprintf(0, "Log: region number %d exceeds region count %d\n",
312 region_num, regioncount_);
313 warned = true;
314 }
315 region_num = region_num % regioncount_;
316 }
317 return region_num;
318 }
319
320 // Report which cores are associated with a given region.
FindCoreMask(int32 region)321 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
322 sat_assert(region >= 0);
323 region %= num_nodes_;
324 if (!cpu_sets_valid_[region]) {
325 CPU_ZERO(&cpu_sets_[region]);
326 for (int i = 0; i < num_cpus_per_node_; ++i) {
327 CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
328 }
329 cpu_sets_valid_[region] = true;
330 logprintf(5, "Log: Region %d mask 0x%s\n",
331 region, FindCoreMaskFormat(region).c_str());
332 }
333 return &cpu_sets_[region];
334 }
335
336 // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)337 string OsLayer::FindCoreMaskFormat(int32 region) {
338 cpu_set_t* mask = FindCoreMask(region);
339 string format = cpuset_format(mask);
340 if (format.size() < 8)
341 format = string(8 - format.size(), '0') + format;
342 return format;
343 }
344
345 // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)346 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
347 time_t now = clock_->Now();
348 int ttf = now - time_initialized_;
349 if (strlen(symptom) && strlen(part)) {
350 logprintf(0, "Report Error: %s : %s : %d : %ds\n",
351 symptom, part, count, ttf);
352 } else {
353 // Log something so the error still shows up, but this won't break the
354 // parser.
355 logprintf(0, "Warning: Invalid Report Error: "
356 "%s : %s : %d : %ds\n", symptom, part, count, ttf);
357 }
358 return true;
359 }
360
361 // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()362 int64 OsLayer::FindHugePages() {
363 char buf[65] = "0";
364
365 // This is a kernel interface to query the numebr of hugepages
366 // available in the system.
367 static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
368 int hpfile = open(hugepages_info_file, O_RDONLY);
369
370 ssize_t bytes_read = read(hpfile, buf, 64);
371 close(hpfile);
372
373 if (bytes_read <= 0) {
374 logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
375 "read did not provide data\n");
376 return 0;
377 }
378
379 if (bytes_read == 64) {
380 logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
381 "is surprisingly large\n");
382 return 0;
383 }
384
385 // Add a null termintation to be string safe.
386 buf[bytes_read] = '\0';
387 // Read the page count.
388 int64 pages = strtoull(buf, NULL, 10); // NOLINT
389
390 return pages;
391 }
392
FindFreeMemSize()393 int64 OsLayer::FindFreeMemSize() {
394 int64 size = 0;
395 int64 minsize = 0;
396 if (totalmemsize_ > 0)
397 return totalmemsize_;
398
399 int64 pages = sysconf(_SC_PHYS_PAGES);
400 int64 avpages = sysconf(_SC_AVPHYS_PAGES);
401 int64 pagesize = sysconf(_SC_PAGESIZE);
402 int64 physsize = pages * pagesize;
403 int64 avphyssize = avpages * pagesize;
404
405 // Assume 2MB hugepages.
406 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
407
408 if ((pages == -1) || (pagesize == -1)) {
409 logprintf(0, "Process Error: sysconf could not determine memory size.\n");
410 return 0;
411 }
412
413 // We want to leave enough stuff for things to run.
414 // If the user specified a minimum amount of memory to expect, require that.
415 // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
416 // If less than 2GB is present use 85% of what's available.
417 // These are fairly arbitrary numbers that seem to work OK.
418 //
419 // TODO(nsanders): is there a more correct way to determine target
420 // memory size?
421 if (hugepagesize > 0) {
422 if (min_hugepages_bytes_ > 0) {
423 minsize = min_hugepages_bytes_;
424 } else {
425 minsize = hugepagesize;
426 }
427 } else {
428 if (physsize < 2048LL * kMegabyte) {
429 minsize = ((pages * 85) / 100) * pagesize;
430 } else {
431 minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
432 }
433 // Make sure that at least reserve_mb_ is left for the system.
434 if (reserve_mb_ > 0) {
435 int64 totalsize = pages * pagesize;
436 int64 reserve_kb = reserve_mb_ * kMegabyte;
437 if (reserve_kb > totalsize) {
438 logprintf(0, "Procedural Error: %lld is bigger than the total memory "
439 "available %lld\n", reserve_kb, totalsize);
440 } else if (reserve_kb > totalsize - minsize) {
441 logprintf(5, "Warning: Overriding memory to use: original %lld, "
442 "current %lld\n", minsize, totalsize - reserve_kb);
443 minsize = totalsize - reserve_kb;
444 }
445 }
446 }
447
448 // Use hugepage sizing if available.
449 if (hugepagesize > 0) {
450 if (hugepagesize < minsize) {
451 logprintf(0, "Procedural Error: Not enough hugepages. "
452 "%lldMB available < %lldMB required.\n",
453 hugepagesize / kMegabyte,
454 minsize / kMegabyte);
455 // Require the calculated minimum amount of memory.
456 size = minsize;
457 } else {
458 // Require that we get all hugepages.
459 size = hugepagesize;
460 }
461 } else {
462 // Require the calculated minimum amount of memory.
463 size = minsize;
464 }
465
466 logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
467 "Targeting %lld MB (%lld%%)\n",
468 physsize / kMegabyte,
469 avphyssize / kMegabyte,
470 hugepagesize / kMegabyte,
471 size / kMegabyte,
472 size * 100 / physsize);
473
474 totalmemsize_ = size;
475 return size;
476 }
477
478 // Allocates all memory available.
AllocateAllMem()479 int64 OsLayer::AllocateAllMem() {
480 int64 length = FindFreeMemSize();
481 bool retval = AllocateTestMem(length, 0);
482 if (retval)
483 return length;
484 else
485 return 0;
486 }
487
488 // Allocate the target memory. This may be from malloc, hugepage pool
489 // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)490 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
491 // Try hugepages first.
492 void *buf = 0;
493
494 sat_assert(length >= 0);
495
496 if (paddr_base)
497 logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
498 " ignore.\n", paddr_base);
499
500 // Determine optimal memory allocation path.
501 bool prefer_hugepages = false;
502 bool prefer_posix_shm = false;
503 bool prefer_dynamic_mapping = false;
504
505 // Are there enough hugepages?
506 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
507 // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
508 if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
509 prefer_dynamic_mapping = true;
510 prefer_posix_shm = true;
511 logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
512 logprintf(3, "Log: You may need to run "
513 "'sudo mount -o remount,size=100\% /dev/shm.'\n");
514 } else if (hugepagesize >= length) {
515 prefer_hugepages = true;
516 logprintf(3, "Log: Prefer using hugepage allocation.\n");
517 } else {
518 logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
519 }
520
521 #ifdef HAVE_SYS_SHM_H
522 // Allocate hugepage mapped memory.
523 if (prefer_hugepages) {
524 do { // Allow break statement.
525 int shmid;
526 void *shmaddr;
527
528 if ((shmid = shmget(2, length,
529 SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
530 int err = errno;
531 string errtxt = ErrorString(err);
532 logprintf(3, "Log: failed to allocate shared hugepage "
533 "object - err %d (%s)\n",
534 err, errtxt.c_str());
535 logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
536 break;
537 }
538
539 shmaddr = shmat(shmid, NULL, 0);
540 if (shmaddr == reinterpret_cast<void*>(-1)) {
541 int err = errno;
542 string errtxt = ErrorString(err);
543 logprintf(0, "Log: failed to attach shared "
544 "hugepage object - err %d (%s).\n",
545 err, errtxt.c_str());
546 if (shmctl(shmid, IPC_RMID, NULL) < 0) {
547 int err = errno;
548 string errtxt = ErrorString(err);
549 logprintf(0, "Log: failed to remove shared "
550 "hugepage object - err %d (%s).\n",
551 err, errtxt.c_str());
552 }
553 break;
554 }
555 use_hugepages_ = true;
556 shmid_ = shmid;
557 buf = shmaddr;
558 logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
559 shmid, shmaddr);
560 } while (0);
561 }
562
563 if ((!use_hugepages_) && prefer_posix_shm) {
564 do {
565 int shm_object;
566 void *shmaddr = NULL;
567
568 shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
569 if (shm_object < 0) {
570 int err = errno;
571 string errtxt = ErrorString(err);
572 logprintf(3, "Log: failed to allocate shared "
573 "smallpage object - err %d (%s)\n",
574 err, errtxt.c_str());
575 break;
576 }
577
578 if (0 > ftruncate(shm_object, length)) {
579 int err = errno;
580 string errtxt = ErrorString(err);
581 logprintf(3, "Log: failed to ftruncate shared "
582 "smallpage object - err %d (%s)\n",
583 err, errtxt.c_str());
584 break;
585 }
586
587 // 32 bit linux apps can only use ~1.4G of address space.
588 // Use dynamic mapping for allocations larger than that.
589 // Currently perf hit is ~10% for this.
590 if (prefer_dynamic_mapping) {
591 dynamic_mapped_shmem_ = true;
592 } else {
593 // Do a full mapping here otherwise.
594 shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
595 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
596 shm_object, 0);
597 if (shmaddr == reinterpret_cast<void*>(-1)) {
598 int err = errno;
599 string errtxt = ErrorString(err);
600 logprintf(0, "Log: failed to map shared "
601 "smallpage object - err %d (%s).\n",
602 err, errtxt.c_str());
603 break;
604 }
605 }
606
607 use_posix_shm_ = true;
608 shmid_ = shm_object;
609 buf = shmaddr;
610 char location_message[256] = "";
611 if (dynamic_mapped_shmem_) {
612 sprintf(location_message, "mapped as needed");
613 } else {
614 sprintf(location_message, "at %p", shmaddr);
615 }
616 logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
617 shm_object, location_message);
618 } while (0);
619 shm_unlink("/stressapptest");
620 }
621 #endif // HAVE_SYS_SHM_H
622
623 if (!use_hugepages_ && !use_posix_shm_) {
624 // If the page size is what SAT is expecting explicitly perform mmap()
625 // allocation.
626 if (sysconf(_SC_PAGESIZE) >= 4096) {
627 void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
628 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
629 if (map_buf != MAP_FAILED) {
630 buf = map_buf;
631 mmapped_allocation_ = true;
632 logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
633 }
634 }
635 if (!mmapped_allocation_) {
636 // Use memalign to ensure that blocks are aligned enough for disk direct
637 // IO.
638 buf = static_cast<char*>(memalign(4096, length));
639 if (buf) {
640 logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
641 } else {
642 logprintf(0, "Process Error: memalign returned 0\n");
643 if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
644 logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
645 "bit process. Please setup shared memory.\n");
646 }
647 }
648 }
649 }
650
651 testmem_ = buf;
652 if (buf || dynamic_mapped_shmem_) {
653 testmemsize_ = length;
654 } else {
655 testmemsize_ = 0;
656 }
657
658 return (buf != 0) || dynamic_mapped_shmem_;
659 }
660
661 // Free the test memory.
FreeTestMem()662 void OsLayer::FreeTestMem() {
663 if (testmem_) {
664 if (use_hugepages_) {
665 #ifdef HAVE_SYS_SHM_H
666 shmdt(testmem_);
667 shmctl(shmid_, IPC_RMID, NULL);
668 #endif
669 } else if (use_posix_shm_) {
670 if (!dynamic_mapped_shmem_) {
671 munmap(testmem_, testmemsize_);
672 }
673 close(shmid_);
674 } else if (mmapped_allocation_) {
675 munmap(testmem_, testmemsize_);
676 } else {
677 free(testmem_);
678 }
679 testmem_ = 0;
680 testmemsize_ = 0;
681 }
682 }
683
684
685 // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)686 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
687 sat_assert((offset + length) <= testmemsize_);
688 if (dynamic_mapped_shmem_) {
689 // TODO(nsanders): Check if we can support MAP_NONBLOCK,
690 // and evaluate performance hit from not using it.
691 #ifdef HAVE_MMAP64
692 void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
693 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
694 shmid_, offset);
695 #else
696 void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
697 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
698 shmid_, offset);
699 #endif
700 if (mapping == MAP_FAILED) {
701 string errtxt = ErrorString(errno);
702 logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
703 "error: %s.\n",
704 offset, length, errtxt.c_str());
705 sat_assert(0);
706 }
707 return mapping;
708 }
709
710 return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
711 }
712
713 // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)714 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
715 if (dynamic_mapped_shmem_) {
716 int retval = munmap(addr, length);
717 if (retval == -1) {
718 string errtxt = ErrorString(errno);
719 logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
720 "error: %s.\n",
721 addr, length, errtxt.c_str());
722 sat_assert(0);
723 }
724 }
725 }
726
727 // No error polling on unknown systems.
ErrorPoll()728 int OsLayer::ErrorPoll() {
729 return 0;
730 }
731
732 // Generally, poll for errors once per second.
ErrorWait()733 void OsLayer::ErrorWait() {
734 sat_sleep(1);
735 return;
736 }
737
738 // Open a PCI bus-dev-func as a file and return its file descriptor.
739 // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)740 int OsLayer::PciOpen(int bus, int device, int function) {
741 char dev_file[256];
742
743 snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
744 bus, device, function);
745
746 int fd = open(dev_file, O_RDWR);
747 if (fd == -1) {
748 logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
749 "function %d (errno %d).\n",
750 bus, device, function, errno);
751 return -1;
752 }
753
754 return fd;
755 }
756
757
758 // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)759 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
760 // Strict aliasing rules lawyers will cause data corruption
761 // on cast pointers in some gccs.
762 union {
763 uint32 l32;
764 uint16 l16;
765 uint8 l8;
766 } datacast;
767 datacast.l32 = 0;
768 uint32 size = width / 8;
769
770 sat_assert((width == 32) || (width == 16) || (width == 8));
771 sat_assert(offset <= (256 - size));
772
773 if (lseek(fd, offset, SEEK_SET) < 0) {
774 logprintf(0, "Process Error: Can't seek %x\n", offset);
775 return 0;
776 }
777 if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
778 logprintf(0, "Process Error: Can't read %x\n", offset);
779 return 0;
780 }
781
782 // Extract the data.
783 switch (width) {
784 case 8:
785 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
786 return datacast.l8;
787 case 16:
788 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
789 return datacast.l16;
790 case 32:
791 return datacast.l32;
792 }
793 return 0;
794 }
795
PciWrite(int fd,uint32 offset,uint32 value,int width)796 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
797 // Strict aliasing rules lawyers will cause data corruption
798 // on cast pointers in some gccs.
799 union {
800 uint32 l32;
801 uint16 l16;
802 uint8 l8;
803 } datacast;
804 datacast.l32 = 0;
805 uint32 size = width / 8;
806
807 sat_assert((width == 32) || (width == 16) || (width == 8));
808 sat_assert(offset <= (256 - size));
809
810 // Cram the data into the right alignment.
811 switch (width) {
812 case 8:
813 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
814 datacast.l8 = value;
815 case 16:
816 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
817 datacast.l16 = value;
818 case 32:
819 datacast.l32 = value;
820 }
821
822 if (lseek(fd, offset, SEEK_SET) < 0) {
823 logprintf(0, "Process Error: Can't seek %x\n", offset);
824 return;
825 }
826 if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
827 logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
828 return;
829 }
830
831 return;
832 }
833
834
835
836 // Open dev msr.
OpenMSR(uint32 core,uint32 address)837 int OsLayer::OpenMSR(uint32 core, uint32 address) {
838 char buf[256];
839 snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
840 int fd = open(buf, O_RDWR);
841 if (fd < 0)
842 return fd;
843
844 uint32 pos = lseek(fd, address, SEEK_SET);
845 if (pos != address) {
846 close(fd);
847 logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
848 return -1;
849 }
850
851 return fd;
852 }
853
ReadMSR(uint32 core,uint32 address,uint64 * data)854 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
855 int fd = OpenMSR(core, address);
856 if (fd < 0)
857 return false;
858
859 // Read from the msr.
860 bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
861
862 if (!res)
863 logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
864
865 close(fd);
866
867 return res;
868 }
869
WriteMSR(uint32 core,uint32 address,uint64 * data)870 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
871 int fd = OpenMSR(core, address);
872 if (fd < 0)
873 return false;
874
875 // Write to the msr
876 bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
877
878 if (!res)
879 logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
880
881 close(fd);
882
883 return res;
884 }
885
886 // Extract bits [n+len-1, n] from a 32 bit word.
887 // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)888 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
889 return (val >> n) & ((1<<len) - 1);
890 }
891
892 // Generic CPU stress workload that would work on any CPU/Platform.
893 // Float-point array moving average calculation.
CpuStressWorkload()894 bool OsLayer::CpuStressWorkload() {
895 double float_arr[100];
896 double sum = 0;
897 #ifdef HAVE_RAND_R
898 unsigned int seed = 12345;
899 #endif
900
901 // Initialize array with random numbers.
902 for (int i = 0; i < 100; i++) {
903 #ifdef HAVE_RAND_R
904 float_arr[i] = rand_r(&seed);
905 if (rand_r(&seed) % 2)
906 float_arr[i] *= -1.0;
907 #else
908 srand(time(NULL));
909 float_arr[i] = rand(); // NOLINT
910 if (rand() % 2) // NOLINT
911 float_arr[i] *= -1.0;
912 #endif
913 }
914
915 // Calculate moving average.
916 for (int i = 0; i < 100000000; i++) {
917 float_arr[i % 100] =
918 (float_arr[i % 100] + float_arr[(i + 1) % 100] +
919 float_arr[(i + 99) % 100]) / 3;
920 sum += float_arr[i % 100];
921 }
922
923 // Artificial printf so the loops do not get optimized away.
924 if (sum == 0.0)
925 logprintf(12, "Log: I'm Feeling Lucky!\n");
926 return true;
927 }
928