1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7
8 // http://www.apache.org/licenses/LICENSE-2.0
9
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 // os.cc : os and machine specific implementation
17 // This file includes an abstracted interface
18 // for linux-distro specific and HW specific
19 // interfaces.
20
21 #include "os.h"
22
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/types.h>
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/ioctl.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/ipc.h>
35 #ifdef HAVE_SYS_SHM_H
36 #include <sys/shm.h>
37 #endif
38 #include <unistd.h>
39
40 #ifndef SHM_HUGETLB
41 #define SHM_HUGETLB 04000 // remove when glibc defines it
42 #endif
43
44 #include <string>
45 #include <list>
46
47 // This file must work with autoconf on its public version,
48 // so these includes are correct.
49 #include "sattypes.h"
50 #include "error_diag.h"
51 #include "clock.h"
52
53 // OsLayer initialization.
OsLayer()54 OsLayer::OsLayer() {
55 testmem_ = 0;
56 testmemsize_ = 0;
57 totalmemsize_ = 0;
58 min_hugepages_bytes_ = 0;
59 reserve_mb_ = 0;
60 normal_mem_ = true;
61 use_hugepages_ = false;
62 use_posix_shm_ = false;
63 dynamic_mapped_shmem_ = false;
64 mmapped_allocation_ = false;
65 shmid_ = 0;
66 channels_ = NULL;
67
68 time_initialized_ = 0;
69
70 regionsize_ = 0;
71 regioncount_ = 1;
72 num_cpus_ = 0;
73 num_nodes_ = 0;
74 num_cpus_per_node_ = 0;
75 error_diagnoser_ = 0;
76 err_log_callback_ = 0;
77 error_injection_ = false;
78
79 void *pvoid = 0;
80 address_mode_ = sizeof(pvoid) * 8;
81
82 has_clflush_ = false;
83 has_vector_ = false;
84
85 use_flush_page_cache_ = false;
86
87 clock_ = NULL;
88 }
89
90 // OsLayer cleanup.
~OsLayer()91 OsLayer::~OsLayer() {
92 if (error_diagnoser_)
93 delete error_diagnoser_;
94 if (clock_)
95 delete clock_;
96 }
97
98 // OsLayer initialization.
Initialize()99 bool OsLayer::Initialize() {
100 if (!clock_) {
101 clock_ = new Clock();
102 }
103
104 time_initialized_ = clock_->Now();
105 // Detect asm support.
106 GetFeatures();
107
108 if (num_cpus_ == 0) {
109 num_nodes_ = 1;
110 num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
111 num_cpus_per_node_ = num_cpus_ / num_nodes_;
112 }
113 logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
114 sat_assert(CPU_SETSIZE >= num_cpus_);
115 cpu_sets_.resize(num_nodes_);
116 cpu_sets_valid_.resize(num_nodes_);
117 // Create error diagnoser.
118 error_diagnoser_ = new ErrorDiag();
119 if (!error_diagnoser_->set_os(this))
120 return false;
121 return true;
122 }
123
124 // Machine type detected. Can we implement all these functions correctly?
IsSupported()125 bool OsLayer::IsSupported() {
126 if (kOpenSource) {
127 // There are no explicitly supported systems in open source version.
128 return true;
129 }
130
131 // This is the default empty implementation.
132 // SAT won't report full error information.
133 return false;
134 }
135
AddressMode()136 int OsLayer::AddressMode() {
137 // Detect 32/64 bit binary.
138 void *pvoid = 0;
139 return sizeof(pvoid) * 8;
140 }
141
142 // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)143 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
144 uint64 frame, paddr, pfnmask, pagemask;
145 int pagesize = sysconf(_SC_PAGESIZE);
146 off64_t off = ((uintptr_t)vaddr) / pagesize * 8;
147 int fd = open(kPagemapPath, O_RDONLY);
148
149 /*
150 * https://www.kernel.org/doc/Documentation/vm/pagemap.txt
151 * API change (July 2015)
152 * https://patchwork.kernel.org/patch/6787991/
153 */
154
155 if (fd < 0)
156 return 0;
157
158 if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
159 int err = errno;
160 string errtxt = ErrorString(err);
161 logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
162 kPagemapPath, err, errtxt.c_str());
163 if (fd >= 0)
164 close(fd);
165 return 0;
166 }
167 close(fd);
168
169 /* Check if page is present and not swapped. */
170 if (!(frame & (1ULL << 63)) || (frame & (1ULL << 62)))
171 return 0;
172
173 /* pfn is bits 0-54. */
174 pfnmask = ((1ULL << 55) - 1);
175 /* Pagesize had better be a power of 2. */
176 pagemask = pagesize - 1;
177
178 paddr = ((frame & pfnmask) * pagesize) | ((uintptr_t)vaddr & pagemask);
179 return paddr;
180 }
181
182 // Returns the HD device that contains this file.
FindFileDevice(string filename)183 string OsLayer::FindFileDevice(string filename) {
184 return "hdUnknown";
185 }
186
187 // Returns a list of locations corresponding to HD devices.
FindFileDevices()188 list<string> OsLayer::FindFileDevices() {
189 // No autodetection on unknown systems.
190 list<string> locations;
191 return locations;
192 }
193
194
195 // Get HW core features from cpuid instruction.
GetFeatures()196 void OsLayer::GetFeatures() {
197 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
198 unsigned int eax = 1, ebx, ecx, edx;
199 cpuid(&eax, &ebx, &ecx, &edx);
200 has_clflush_ = (edx >> 19) & 1;
201 has_vector_ = (edx >> 26) & 1; // SSE2 caps bit.
202
203 logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
204 has_clflush_ ? "true" : "false",
205 has_vector_ ? "true" : "false");
206 #elif defined(STRESSAPPTEST_CPU_PPC)
207 // All PPC implementations have cache flush instructions.
208 has_clflush_ = true;
209 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
210 // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
211 // For now assume neon and don't run -W if you don't have it.
212 has_vector_ = true; // NEON.
213 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
214 #else
215 #warning "Unsupported CPU type: unable to determine feature set."
216 #endif
217 }
218
219
220 // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)221 void OsLayer::ActivateFlushPageCache(void) {
222 logprintf(9, "Log: page cache will be flushed as needed\n");
223 use_flush_page_cache_ = true;
224 }
225
226 // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)227 bool OsLayer::FlushPageCache(void) {
228 if (!use_flush_page_cache_)
229 return true;
230
231 // First, ask the kernel to write the cache to the disk.
232 sync();
233
234 // Second, ask the kernel to empty the cache by writing "1" to
235 // "/proc/sys/vm/drop_caches".
236 static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
237 int dcfile = open(drop_caches_file, O_WRONLY);
238 if (dcfile < 0) {
239 int err = errno;
240 string errtxt = ErrorString(err);
241 logprintf(3, "Log: failed to open %s - err %d (%s)\n",
242 drop_caches_file, err, errtxt.c_str());
243 return false;
244 }
245
246 ssize_t bytes_written = write(dcfile, "1", 1);
247 close(dcfile);
248
249 if (bytes_written != 1) {
250 int err = errno;
251 string errtxt = ErrorString(err);
252 logprintf(3, "Log: failed to write %s - err %d (%s)\n",
253 drop_caches_file, err, errtxt.c_str());
254 return false;
255 }
256 return true;
257 }
258
259
260 // We need to flush the cacheline here.
Flush(void * vaddr)261 void OsLayer::Flush(void *vaddr) {
262 // Use the generic flush. This function is just so we can override
263 // this if we are so inclined.
264 if (has_clflush_) {
265 OsLayer::FastFlush(vaddr);
266 }
267 }
268
269
270 // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)271 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
272 unsigned int size_in_bytes,
273 AdlerChecksum *checksum) {
274 if (has_vector_) {
275 return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
276 } else {
277 return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
278 }
279 }
280
281
282 // Translate physical address to memory module/chip name.
283 // Assumes interleaving between two memory channels based on the XOR of
284 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
285 // blocks with bits distributed from each chip in that channel.
FindDimm(uint64 addr,char * buf,int len)286 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
287 if (!channels_) {
288 snprintf(buf, len, "DIMM Unknown");
289 return -1;
290 }
291
292 // Find channel by XORing address bits in channel_hash mask.
293 uint32 low = static_cast<uint32>(addr & channel_hash_);
294 uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
295 vector<string>& channel = (*channels_)[
296 __builtin_parity(high) ^ __builtin_parity(low)];
297
298 // Find dram chip by finding which byte within the channel
299 // by address mod channel width, then divide the channel
300 // evenly among the listed dram chips. Note, this will not work
301 // with x4 dram.
302 int chip = (addr % (channel_width_ / 8)) /
303 ((channel_width_ / 8) / channel.size());
304 string name = channel[chip];
305 snprintf(buf, len, "%s", name.c_str());
306 return 1;
307 }
308
309
310 // Classifies addresses according to "regions"
311 // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)312 int32 OsLayer::FindRegion(uint64 addr) {
313 static bool warned = false;
314
315 if (regionsize_ == 0) {
316 regionsize_ = totalmemsize_ / 8;
317 if (regionsize_ < 512 * kMegabyte)
318 regionsize_ = 512 * kMegabyte;
319 regioncount_ = totalmemsize_ / regionsize_;
320 if (regioncount_ < 1) regioncount_ = 1;
321 }
322
323 int32 region_num = addr / regionsize_;
324 if (region_num >= regioncount_) {
325 if (!warned) {
326 logprintf(0, "Log: region number %d exceeds region count %d\n",
327 region_num, regioncount_);
328 warned = true;
329 }
330 region_num = region_num % regioncount_;
331 }
332 return region_num;
333 }
334
335 // Report which cores are associated with a given region.
FindCoreMask(int32 region)336 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
337 sat_assert(region >= 0);
338 region %= num_nodes_;
339 if (!cpu_sets_valid_[region]) {
340 CPU_ZERO(&cpu_sets_[region]);
341 for (int i = 0; i < num_cpus_per_node_; ++i) {
342 CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
343 }
344 cpu_sets_valid_[region] = true;
345 logprintf(5, "Log: Region %d mask 0x%s\n",
346 region, FindCoreMaskFormat(region).c_str());
347 }
348 return &cpu_sets_[region];
349 }
350
351 // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)352 string OsLayer::FindCoreMaskFormat(int32 region) {
353 cpu_set_t* mask = FindCoreMask(region);
354 string format = cpuset_format(mask);
355 if (format.size() < 8)
356 format = string(8 - format.size(), '0') + format;
357 return format;
358 }
359
360 // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)361 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
362 time_t now = clock_->Now();
363 int ttf = now - time_initialized_;
364 if (strlen(symptom) && strlen(part)) {
365 logprintf(0, "Report Error: %s : %s : %d : %ds\n",
366 symptom, part, count, ttf);
367 } else {
368 // Log something so the error still shows up, but this won't break the
369 // parser.
370 logprintf(0, "Warning: Invalid Report Error: "
371 "%s : %s : %d : %ds\n", symptom, part, count, ttf);
372 }
373 return true;
374 }
375
376 // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()377 int64 OsLayer::FindHugePages() {
378 char buf[65] = "0";
379
380 // This is a kernel interface to query the numebr of hugepages
381 // available in the system.
382 static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
383 int hpfile = open(hugepages_info_file, O_RDONLY);
384
385 ssize_t bytes_read = read(hpfile, buf, 64);
386 close(hpfile);
387
388 if (bytes_read <= 0) {
389 logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
390 "read did not provide data\n");
391 return 0;
392 }
393
394 if (bytes_read == 64) {
395 logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
396 "is surprisingly large\n");
397 return 0;
398 }
399
400 // Add a null termintation to be string safe.
401 buf[bytes_read] = '\0';
402 // Read the page count.
403 int64 pages = strtoull(buf, NULL, 10); // NOLINT
404
405 return pages;
406 }
407
FindFreeMemSize()408 int64 OsLayer::FindFreeMemSize() {
409 int64 size = 0;
410 int64 minsize = 0;
411 if (totalmemsize_ > 0)
412 return totalmemsize_;
413
414 int64 pages = sysconf(_SC_PHYS_PAGES);
415 int64 avpages = sysconf(_SC_AVPHYS_PAGES);
416 int64 pagesize = sysconf(_SC_PAGESIZE);
417 int64 physsize = pages * pagesize;
418 int64 avphyssize = avpages * pagesize;
419
420 // Assume 2MB hugepages.
421 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
422
423 if ((pages == -1) || (pagesize == -1)) {
424 logprintf(0, "Process Error: sysconf could not determine memory size.\n");
425 return 0;
426 }
427
428 // We want to leave enough stuff for things to run.
429 // If the user specified a minimum amount of memory to expect, require that.
430 // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
431 // If less than 2GB is present use 85% of what's available.
432 // These are fairly arbitrary numbers that seem to work OK.
433 //
434 // TODO(nsanders): is there a more correct way to determine target
435 // memory size?
436 if (hugepagesize > 0) {
437 if (min_hugepages_bytes_ > 0) {
438 minsize = min_hugepages_bytes_;
439 } else {
440 minsize = hugepagesize;
441 }
442 } else {
443 if (physsize < 2048LL * kMegabyte) {
444 minsize = ((pages * 85) / 100) * pagesize;
445 } else {
446 minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
447 }
448 // Make sure that at least reserve_mb_ is left for the system.
449 if (reserve_mb_ > 0) {
450 int64 totalsize = pages * pagesize;
451 int64 reserve_kb = reserve_mb_ * kMegabyte;
452 if (reserve_kb > totalsize) {
453 logprintf(0, "Procedural Error: %lld is bigger than the total memory "
454 "available %lld\n", reserve_kb, totalsize);
455 } else if (reserve_kb > totalsize - minsize) {
456 logprintf(5, "Warning: Overriding memory to use: original %lld, "
457 "current %lld\n", minsize, totalsize - reserve_kb);
458 minsize = totalsize - reserve_kb;
459 }
460 }
461 }
462
463 // Use hugepage sizing if available.
464 if (hugepagesize > 0) {
465 if (hugepagesize < minsize) {
466 logprintf(0, "Procedural Error: Not enough hugepages. "
467 "%lldMB available < %lldMB required.\n",
468 hugepagesize / kMegabyte,
469 minsize / kMegabyte);
470 // Require the calculated minimum amount of memory.
471 size = minsize;
472 } else {
473 // Require that we get all hugepages.
474 size = hugepagesize;
475 }
476 } else {
477 // Require the calculated minimum amount of memory.
478 size = minsize;
479 }
480
481 logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
482 "Targeting %lld MB (%lld%%)\n",
483 physsize / kMegabyte,
484 avphyssize / kMegabyte,
485 hugepagesize / kMegabyte,
486 size / kMegabyte,
487 size * 100 / physsize);
488
489 totalmemsize_ = size;
490 return size;
491 }
492
493 // Allocates all memory available.
AllocateAllMem()494 int64 OsLayer::AllocateAllMem() {
495 int64 length = FindFreeMemSize();
496 bool retval = AllocateTestMem(length, 0);
497 if (retval)
498 return length;
499 else
500 return 0;
501 }
502
503 // Allocate the target memory. This may be from malloc, hugepage pool
504 // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)505 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
506 // Try hugepages first.
507 void *buf = 0;
508
509 sat_assert(length >= 0);
510
511 if (paddr_base)
512 logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
513 " ignore.\n", paddr_base);
514
515 // Determine optimal memory allocation path.
516 bool prefer_hugepages = false;
517 bool prefer_posix_shm = false;
518 bool prefer_dynamic_mapping = false;
519
520 // Are there enough hugepages?
521 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
522 // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
523 if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
524 prefer_dynamic_mapping = true;
525 prefer_posix_shm = true;
526 logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
527 logprintf(3, "Log: You may need to run "
528 "'sudo mount -o remount,size=100\% /dev/shm.'\n");
529 } else if (hugepagesize >= length) {
530 prefer_hugepages = true;
531 logprintf(3, "Log: Prefer using hugepage allocation.\n");
532 } else {
533 logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
534 }
535
536 #ifdef HAVE_SYS_SHM_H
537 // Allocate hugepage mapped memory.
538 if (prefer_hugepages) {
539 do { // Allow break statement.
540 int shmid;
541 void *shmaddr;
542
543 if ((shmid = shmget(2, length,
544 SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
545 int err = errno;
546 string errtxt = ErrorString(err);
547 logprintf(3, "Log: failed to allocate shared hugepage "
548 "object - err %d (%s)\n",
549 err, errtxt.c_str());
550 logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
551 break;
552 }
553
554 shmaddr = shmat(shmid, NULL, 0);
555 if (shmaddr == reinterpret_cast<void*>(-1)) {
556 int err = errno;
557 string errtxt = ErrorString(err);
558 logprintf(0, "Log: failed to attach shared "
559 "hugepage object - err %d (%s).\n",
560 err, errtxt.c_str());
561 if (shmctl(shmid, IPC_RMID, NULL) < 0) {
562 int err = errno;
563 string errtxt = ErrorString(err);
564 logprintf(0, "Log: failed to remove shared "
565 "hugepage object - err %d (%s).\n",
566 err, errtxt.c_str());
567 }
568 break;
569 }
570 use_hugepages_ = true;
571 shmid_ = shmid;
572 buf = shmaddr;
573 logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
574 shmid, shmaddr);
575 } while (0);
576 }
577
578 if ((!use_hugepages_) && prefer_posix_shm) {
579 do {
580 int shm_object;
581 void *shmaddr = NULL;
582
583 shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
584 if (shm_object < 0) {
585 int err = errno;
586 string errtxt = ErrorString(err);
587 logprintf(3, "Log: failed to allocate shared "
588 "smallpage object - err %d (%s)\n",
589 err, errtxt.c_str());
590 break;
591 }
592
593 if (0 > ftruncate(shm_object, length)) {
594 int err = errno;
595 string errtxt = ErrorString(err);
596 logprintf(3, "Log: failed to ftruncate shared "
597 "smallpage object - err %d (%s)\n",
598 err, errtxt.c_str());
599 break;
600 }
601
602 // 32 bit linux apps can only use ~1.4G of address space.
603 // Use dynamic mapping for allocations larger than that.
604 // Currently perf hit is ~10% for this.
605 if (prefer_dynamic_mapping) {
606 dynamic_mapped_shmem_ = true;
607 } else {
608 // Do a full mapping here otherwise.
609 shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
610 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
611 shm_object, 0);
612 if (shmaddr == reinterpret_cast<void*>(-1)) {
613 int err = errno;
614 string errtxt = ErrorString(err);
615 logprintf(0, "Log: failed to map shared "
616 "smallpage object - err %d (%s).\n",
617 err, errtxt.c_str());
618 break;
619 }
620 }
621
622 use_posix_shm_ = true;
623 shmid_ = shm_object;
624 buf = shmaddr;
625 char location_message[256] = "";
626 if (dynamic_mapped_shmem_) {
627 sprintf(location_message, "mapped as needed");
628 } else {
629 sprintf(location_message, "at %p", shmaddr);
630 }
631 logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
632 shm_object, location_message);
633 } while (0);
634 shm_unlink("/stressapptest");
635 }
636 #endif // HAVE_SYS_SHM_H
637
638 if (!use_hugepages_ && !use_posix_shm_) {
639 // If the page size is what SAT is expecting explicitly perform mmap()
640 // allocation.
641 if (sysconf(_SC_PAGESIZE) >= 4096) {
642 void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
643 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
644 if (map_buf != MAP_FAILED) {
645 buf = map_buf;
646 mmapped_allocation_ = true;
647 logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
648 }
649 }
650 if (!mmapped_allocation_) {
651 // Use memalign to ensure that blocks are aligned enough for disk direct
652 // IO.
653 buf = static_cast<char*>(memalign(4096, length));
654 if (buf) {
655 logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
656 } else {
657 logprintf(0, "Process Error: memalign returned 0\n");
658 if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
659 logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
660 "bit process. Please setup shared memory.\n");
661 }
662 }
663 }
664 }
665
666 testmem_ = buf;
667 if (buf || dynamic_mapped_shmem_) {
668 testmemsize_ = length;
669 } else {
670 testmemsize_ = 0;
671 }
672
673 return (buf != 0) || dynamic_mapped_shmem_;
674 }
675
676 // Free the test memory.
FreeTestMem()677 void OsLayer::FreeTestMem() {
678 if (testmem_) {
679 if (use_hugepages_) {
680 #ifdef HAVE_SYS_SHM_H
681 shmdt(testmem_);
682 shmctl(shmid_, IPC_RMID, NULL);
683 #endif
684 } else if (use_posix_shm_) {
685 if (!dynamic_mapped_shmem_) {
686 munmap(testmem_, testmemsize_);
687 }
688 close(shmid_);
689 } else if (mmapped_allocation_) {
690 munmap(testmem_, testmemsize_);
691 } else {
692 free(testmem_);
693 }
694 testmem_ = 0;
695 testmemsize_ = 0;
696 }
697 }
698
699
700 // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)701 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
702 sat_assert((offset + length) <= testmemsize_);
703 if (dynamic_mapped_shmem_) {
704 // TODO(nsanders): Check if we can support MAP_NONBLOCK,
705 // and evaluate performance hit from not using it.
706 #ifdef HAVE_MMAP64
707 void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
708 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
709 shmid_, offset);
710 #else
711 void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
712 MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
713 shmid_, offset);
714 #endif
715 if (mapping == MAP_FAILED) {
716 string errtxt = ErrorString(errno);
717 logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
718 "error: %s.\n",
719 offset, length, errtxt.c_str());
720 sat_assert(0);
721 }
722 return mapping;
723 }
724
725 return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
726 }
727
728 // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)729 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
730 if (dynamic_mapped_shmem_) {
731 int retval = munmap(addr, length);
732 if (retval == -1) {
733 string errtxt = ErrorString(errno);
734 logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
735 "error: %s.\n",
736 addr, length, errtxt.c_str());
737 sat_assert(0);
738 }
739 }
740 }
741
742 // No error polling on unknown systems.
ErrorPoll()743 int OsLayer::ErrorPoll() {
744 return 0;
745 }
746
747 // Generally, poll for errors once per second.
ErrorWait()748 void OsLayer::ErrorWait() {
749 sat_sleep(1);
750 return;
751 }
752
753 // Open a PCI bus-dev-func as a file and return its file descriptor.
754 // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)755 int OsLayer::PciOpen(int bus, int device, int function) {
756 char dev_file[256];
757
758 snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
759 bus, device, function);
760
761 int fd = open(dev_file, O_RDWR);
762 if (fd == -1) {
763 logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
764 "function %d (errno %d).\n",
765 bus, device, function, errno);
766 return -1;
767 }
768
769 return fd;
770 }
771
772
773 // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)774 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
775 // Strict aliasing rules lawyers will cause data corruption
776 // on cast pointers in some gccs.
777 union {
778 uint32 l32;
779 uint16 l16;
780 uint8 l8;
781 } datacast;
782 datacast.l32 = 0;
783 uint32 size = width / 8;
784
785 sat_assert((width == 32) || (width == 16) || (width == 8));
786 sat_assert(offset <= (256 - size));
787
788 if (lseek(fd, offset, SEEK_SET) < 0) {
789 logprintf(0, "Process Error: Can't seek %x\n", offset);
790 return 0;
791 }
792 if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
793 logprintf(0, "Process Error: Can't read %x\n", offset);
794 return 0;
795 }
796
797 // Extract the data.
798 switch (width) {
799 case 8:
800 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
801 return datacast.l8;
802 case 16:
803 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
804 return datacast.l16;
805 case 32:
806 return datacast.l32;
807 }
808 return 0;
809 }
810
PciWrite(int fd,uint32 offset,uint32 value,int width)811 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
812 // Strict aliasing rules lawyers will cause data corruption
813 // on cast pointers in some gccs.
814 union {
815 uint32 l32;
816 uint16 l16;
817 uint8 l8;
818 } datacast;
819 datacast.l32 = 0;
820 uint32 size = width / 8;
821
822 sat_assert((width == 32) || (width == 16) || (width == 8));
823 sat_assert(offset <= (256 - size));
824
825 // Cram the data into the right alignment.
826 switch (width) {
827 case 8:
828 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
829 datacast.l8 = value;
830 case 16:
831 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
832 datacast.l16 = value;
833 case 32:
834 datacast.l32 = value;
835 }
836
837 if (lseek(fd, offset, SEEK_SET) < 0) {
838 logprintf(0, "Process Error: Can't seek %x\n", offset);
839 return;
840 }
841 if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
842 logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
843 return;
844 }
845
846 return;
847 }
848
849
850
851 // Open dev msr.
OpenMSR(uint32 core,uint32 address)852 int OsLayer::OpenMSR(uint32 core, uint32 address) {
853 char buf[256];
854 snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
855 int fd = open(buf, O_RDWR);
856 if (fd < 0)
857 return fd;
858
859 uint32 pos = lseek(fd, address, SEEK_SET);
860 if (pos != address) {
861 close(fd);
862 logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
863 return -1;
864 }
865
866 return fd;
867 }
868
ReadMSR(uint32 core,uint32 address,uint64 * data)869 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
870 int fd = OpenMSR(core, address);
871 if (fd < 0)
872 return false;
873
874 // Read from the msr.
875 bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
876
877 if (!res)
878 logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
879
880 close(fd);
881
882 return res;
883 }
884
WriteMSR(uint32 core,uint32 address,uint64 * data)885 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
886 int fd = OpenMSR(core, address);
887 if (fd < 0)
888 return false;
889
890 // Write to the msr
891 bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
892
893 if (!res)
894 logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
895
896 close(fd);
897
898 return res;
899 }
900
901 // Extract bits [n+len-1, n] from a 32 bit word.
902 // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)903 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
904 return (val >> n) & ((1<<len) - 1);
905 }
906
907 // Generic CPU stress workload that would work on any CPU/Platform.
908 // Float-point array moving average calculation.
CpuStressWorkload()909 bool OsLayer::CpuStressWorkload() {
910 double float_arr[100];
911 double sum = 0;
912 #ifdef HAVE_RAND_R
913 unsigned int seed = 12345;
914 #endif
915
916 // Initialize array with random numbers.
917 for (int i = 0; i < 100; i++) {
918 #ifdef HAVE_RAND_R
919 float_arr[i] = rand_r(&seed);
920 if (rand_r(&seed) % 2)
921 float_arr[i] *= -1.0;
922 #else
923 srand(time(NULL));
924 float_arr[i] = rand(); // NOLINT
925 if (rand() % 2) // NOLINT
926 float_arr[i] *= -1.0;
927 #endif
928 }
929
930 // Calculate moving average.
931 for (int i = 0; i < 100000000; i++) {
932 float_arr[i % 100] =
933 (float_arr[i % 100] + float_arr[(i + 1) % 100] +
934 float_arr[(i + 99) % 100]) / 3;
935 sum += float_arr[i % 100];
936 }
937
938 // Artificial printf so the loops do not get optimized away.
939 if (sum == 0.0)
940 logprintf(12, "Log: I'm Feeling Lucky!\n");
941 return true;
942 }
943