• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 Google Inc. All Rights Reserved.
2 // Author: nsanders, menderico
3 
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // os.cc : os and machine specific implementation
17 // This file includes an abstracted interface
18 // for linux-distro specific and HW specific
19 // interfaces.
20 
21 #include "os.h"
22 
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <linux/types.h>
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <sys/ioctl.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/ipc.h>
35 #ifdef HAVE_SYS_SHM_H
36 #include <sys/shm.h>
37 #endif
38 #include <unistd.h>
39 
40 #ifndef SHM_HUGETLB
41 #define SHM_HUGETLB      04000  // remove when glibc defines it
42 #endif
43 
44 #include <string>
45 #include <list>
46 
47 // This file must work with autoconf on its public version,
48 // so these includes are correct.
49 #include "sattypes.h"
50 #include "error_diag.h"
51 
52 // OsLayer initialization.
OsLayer()53 OsLayer::OsLayer() {
54   testmem_ = 0;
55   testmemsize_ = 0;
56   totalmemsize_ = 0;
57   min_hugepages_bytes_ = 0;
58   normal_mem_ = true;
59   use_hugepages_ = false;
60   use_posix_shm_ = false;
61   dynamic_mapped_shmem_ = false;
62   shmid_ = 0;
63 
64   time_initialized_ = 0;
65 
66   regionsize_ = 0;
67   regioncount_ = 1;
68   num_cpus_ = 0;
69   num_nodes_ = 0;
70   num_cpus_per_node_ = 0;
71   error_diagnoser_ = 0;
72   err_log_callback_ = 0;
73   error_injection_ = false;
74 
75   void *pvoid = 0;
76   address_mode_ = sizeof(pvoid) * 8;
77 
78   has_clflush_ = false;
79   has_sse2_ = false;
80 
81   use_flush_page_cache_ = false;
82 }
83 
84 // OsLayer cleanup.
~OsLayer()85 OsLayer::~OsLayer() {
86   if (error_diagnoser_)
87     delete error_diagnoser_;
88 }
89 
90 // OsLayer initialization.
Initialize()91 bool OsLayer::Initialize() {
92   time_initialized_ = time(NULL);
93   // Detect asm support.
94   GetFeatures();
95 
96   if (num_cpus_ == 0) {
97     num_nodes_ = 1;
98     num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
99     num_cpus_per_node_ = num_cpus_ / num_nodes_;
100   }
101   logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
102   sat_assert(CPU_SETSIZE >= num_cpus_);
103   cpu_sets_.resize(num_nodes_);
104   cpu_sets_valid_.resize(num_nodes_);
105   // Create error diagnoser.
106   error_diagnoser_ = new ErrorDiag();
107   if (!error_diagnoser_->set_os(this))
108     return false;
109   return true;
110 }
111 
112 // Machine type detected. Can we implement all these functions correctly?
IsSupported()113 bool OsLayer::IsSupported() {
114   if (kOpenSource) {
115     // There are no explicitly supported systems in open source version.
116     return true;
117   }
118 
119   // This is the default empty implementation.
120   // SAT won't report full error information.
121   return false;
122 }
123 
AddressMode()124 int OsLayer::AddressMode() {
125   // Detect 32/64 bit binary.
126   void *pvoid = 0;
127   return sizeof(pvoid) * 8;
128 }
129 
130 // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)131 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
132   // Needs platform specific implementation.
133   return 0;
134 }
135 
136 // Returns the HD device that contains this file.
FindFileDevice(string filename)137 string OsLayer::FindFileDevice(string filename) {
138   return "hdUnknown";
139 }
140 
141 // Returns a list of locations corresponding to HD devices.
FindFileDevices()142 list<string> OsLayer::FindFileDevices() {
143   // No autodetection on unknown systems.
144   list<string> locations;
145   return locations;
146 }
147 
148 
149 // Get HW core features from cpuid instruction.
GetFeatures()150 void OsLayer::GetFeatures() {
151 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
152   // CPUID features documented at:
153   // http://www.sandpile.org/ia32/cpuid.htm
154   int ax, bx, cx, dx;
155   __asm__ __volatile__ (
156       "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
157   has_clflush_ = (dx >> 19) & 1;
158   has_sse2_ = (dx >> 26) & 1;
159 
160   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
161             has_clflush_ ? "true" : "false",
162             has_sse2_ ? "true" : "false");
163 #elif defined(STRESSAPPTEST_CPU_PPC)
164   // All PPC implementations have cache flush instructions.
165   has_clflush_ = true;
166 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
167 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
168 #else
169 #warning "Unsupported CPU type: unable to determine feature set."
170 #endif
171 }
172 
173 
174 // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)175 void OsLayer::ActivateFlushPageCache(void) {
176   logprintf(9, "Log: page cache will be flushed as needed\n");
177   use_flush_page_cache_ = true;
178 }
179 
180 // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)181 bool OsLayer::FlushPageCache(void) {
182   if (!use_flush_page_cache_)
183     return true;
184 
185   // First, ask the kernel to write the cache to the disk.
186   sync();
187 
188   // Second, ask the kernel to empty the cache by writing "1" to
189   // "/proc/sys/vm/drop_caches".
190   static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
191   int dcfile = open(drop_caches_file, O_WRONLY);
192   if (dcfile < 0) {
193     int err = errno;
194     string errtxt = ErrorString(err);
195     logprintf(3, "Log: failed to open %s - err %d (%s)\n",
196               drop_caches_file, err, errtxt.c_str());
197     return false;
198   }
199 
200   ssize_t bytes_written = write(dcfile, "1", 1);
201   close(dcfile);
202 
203   if (bytes_written != 1) {
204     int err = errno;
205     string errtxt = ErrorString(err);
206     logprintf(3, "Log: failed to write %s - err %d (%s)\n",
207               drop_caches_file, err, errtxt.c_str());
208     return false;
209   }
210   return true;
211 }
212 
213 
214 // We need to flush the cacheline here.
Flush(void * vaddr)215 void OsLayer::Flush(void *vaddr) {
216   // Use the generic flush. This function is just so we can override
217   // this if we are so inclined.
218   if (has_clflush_)
219     FastFlush(vaddr);
220 }
221 
222 
223 // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)224 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
225                               unsigned int size_in_bytes,
226                               AdlerChecksum *checksum) {
227   if (has_sse2_) {
228     return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
229   } else {
230     return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
231   }
232 }
233 
234 
235 // Translate user virtual to physical address.
FindDimm(uint64 addr,char * buf,int len)236 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
237   char tmpbuf[256];
238   snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
239   snprintf(buf, len, "%s", tmpbuf);
240   return 0;
241 }
242 
243 
244 // Classifies addresses according to "regions"
245 // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)246 int32 OsLayer::FindRegion(uint64 addr) {
247   static bool warned = false;
248 
249   if (regionsize_ == 0) {
250     regionsize_ = totalmemsize_ / 8;
251     if (regionsize_ < 512 * kMegabyte)
252       regionsize_ = 512 * kMegabyte;
253     regioncount_ = totalmemsize_ / regionsize_;
254     if (regioncount_ < 1) regioncount_ = 1;
255   }
256 
257   int32 region_num = addr / regionsize_;
258   if (region_num >= regioncount_) {
259     if (!warned) {
260         logprintf(0, "Log: region number %d exceeds region count %d\n",
261                   region_num, regioncount_);
262         warned = true;
263     }
264     region_num = region_num % regioncount_;
265   }
266   return region_num;
267 }
268 
269 // Report which cores are associated with a given region.
FindCoreMask(int32 region)270 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
271   sat_assert(region >= 0);
272   region %= num_nodes_;
273   if (!cpu_sets_valid_[region]) {
274     CPU_ZERO(&cpu_sets_[region]);
275     for (int i = 0; i < num_cpus_per_node_; ++i) {
276       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
277     }
278     cpu_sets_valid_[region] = true;
279     logprintf(5, "Log: Region %d mask 0x%s\n",
280                  region, FindCoreMaskFormat(region).c_str());
281   }
282   return &cpu_sets_[region];
283 }
284 
285 // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)286 string OsLayer::FindCoreMaskFormat(int32 region) {
287   cpu_set_t* mask = FindCoreMask(region);
288   string format = cpuset_format(mask);
289   if (format.size() < 8)
290     format = string(8 - format.size(), '0') + format;
291   return format;
292 }
293 
294 // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)295 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
296   time_t now = time(NULL);
297   int ttf = now - time_initialized_;
298   logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
299   return true;
300 }
301 
302 // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()303 int64 OsLayer::FindHugePages() {
304   char buf[65] = "0";
305 
306   // This is a kernel interface to query the numebr of hugepages
307   // available in the system.
308   static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
309   int hpfile = open(hugepages_info_file, O_RDONLY);
310 
311   ssize_t bytes_read = read(hpfile, buf, 64);
312   close(hpfile);
313 
314   if (bytes_read <= 0) {
315     logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
316                   "read did not provide data\n");
317     return 0;
318   }
319 
320   if (bytes_read == 64) {
321     logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
322                  "is surprisingly large\n");
323     return 0;
324   }
325 
326   // Add a null termintation to be string safe.
327   buf[bytes_read] = '\0';
328   // Read the page count.
329   int64 pages = strtoull(buf, NULL, 10);  // NOLINT
330 
331   return pages;
332 }
333 
FindFreeMemSize()334 int64 OsLayer::FindFreeMemSize() {
335   int64 size = 0;
336   int64 minsize = 0;
337   if (totalmemsize_ > 0)
338     return totalmemsize_;
339 
340   int64 pages = sysconf(_SC_PHYS_PAGES);
341   int64 avpages = sysconf(_SC_AVPHYS_PAGES);
342   int64 pagesize = sysconf(_SC_PAGESIZE);
343   int64 physsize = pages * pagesize;
344   int64 avphyssize = avpages * pagesize;
345 
346   // Assume 2MB hugepages.
347   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
348 
349   if ((pages == -1) || (pagesize == -1)) {
350     logprintf(0, "Process Error: sysconf could not determine memory size.\n");
351     return 0;
352   }
353 
354   // We want to leave enough stuff for things to run.
355   // If the user specified a minimum amount of memory to expect, require that.
356   // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
357   // If less than 2GB is present use 85% of what's available.
358   // These are fairly arbitrary numbers that seem to work OK.
359   //
360   // TODO(nsanders): is there a more correct way to determine target
361   // memory size?
362   if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
363     minsize = min_hugepages_bytes_;
364   } else if (physsize < 2048LL * kMegabyte) {
365     minsize = ((pages * 85) / 100) * pagesize;
366   } else {
367     minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
368   }
369 
370   // Use hugepage sizing if available.
371   if (hugepagesize > 0) {
372     if (hugepagesize < minsize) {
373       logprintf(0, "Procedural Error: Not enough hugepages. "
374                    "%lldMB available < %lldMB required.\n",
375                 hugepagesize / kMegabyte,
376                 minsize / kMegabyte);
377       // Require the calculated minimum amount of memory.
378       size = minsize;
379     } else {
380       // Require that we get all hugepages.
381       size = hugepagesize;
382     }
383   } else {
384     // Require the calculated minimum amount of memory.
385     size = minsize;
386   }
387 
388   logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
389                "Targeting %lld MB (%lld%%)\n",
390             physsize / kMegabyte,
391             avphyssize / kMegabyte,
392             hugepagesize / kMegabyte,
393             size / kMegabyte,
394             size * 100 / physsize);
395 
396   totalmemsize_ = size;
397   return size;
398 }
399 
400 // Allocates all memory available.
AllocateAllMem()401 int64 OsLayer::AllocateAllMem() {
402   int64 length = FindFreeMemSize();
403   bool retval = AllocateTestMem(length, 0);
404   if (retval)
405     return length;
406   else
407     return 0;
408 }
409 
410 // Allocate the target memory. This may be from malloc, hugepage pool
411 // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)412 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
413   // Try hugepages first.
414   void *buf = 0;
415 
416   sat_assert(length >= 0);
417 
418   if (paddr_base)
419     logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
420               " ignore.\n", paddr_base);
421 
422   // Determine optimal memory allocation path.
423   bool prefer_hugepages = false;
424   bool prefer_posix_shm = false;
425   bool prefer_dynamic_mapping = false;
426 
427   // Are there enough hugepages?
428   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
429   // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
430   if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
431     prefer_dynamic_mapping = true;
432     prefer_posix_shm = true;
433     logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
434     logprintf(3, "Log: You may need to run "
435                  "'sudo mount -o remount,size=100\% /dev/shm.'\n");
436   } else if (hugepagesize >= length) {
437     prefer_hugepages = true;
438     logprintf(3, "Log: Prefer using hugepace allocation.\n");
439   } else {
440     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
441   }
442 
443 #ifdef HAVE_SYS_SHM_H
444   // Allocate hugepage mapped memory.
445   if (prefer_hugepages) {
446     do { // Allow break statement.
447       int shmid;
448       void *shmaddr;
449 
450       if ((shmid = shmget(2, length,
451               SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
452         int err = errno;
453         string errtxt = ErrorString(err);
454         logprintf(3, "Log: failed to allocate shared hugepage "
455                       "object - err %d (%s)\n",
456                   err, errtxt.c_str());
457         logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
458         break;
459       }
460 
461       shmaddr = shmat(shmid, NULL, NULL);
462       if (shmaddr == reinterpret_cast<void*>(-1)) {
463         int err = errno;
464         string errtxt = ErrorString(err);
465         logprintf(0, "Log: failed to attach shared "
466                      "hugepage object - err %d (%s).\n",
467                   err, errtxt.c_str());
468         if (shmctl(shmid, IPC_RMID, NULL) < 0) {
469           int err = errno;
470           string errtxt = ErrorString(err);
471           logprintf(0, "Log: failed to remove shared "
472                        "hugepage object - err %d (%s).\n",
473                     err, errtxt.c_str());
474         }
475         break;
476       }
477       use_hugepages_ = true;
478       shmid_ = shmid;
479       buf = shmaddr;
480       logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
481                 shmid, shmaddr);
482     } while (0);
483   }
484 
485   if ((!use_hugepages_) && prefer_posix_shm) {
486     do {
487       int shm_object;
488       void *shmaddr = NULL;
489 
490       shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
491       if (shm_object < 0) {
492         int err = errno;
493         string errtxt = ErrorString(err);
494         logprintf(3, "Log: failed to allocate shared "
495                       "smallpage object - err %d (%s)\n",
496                   err, errtxt.c_str());
497         break;
498       }
499 
500       if (0 > ftruncate(shm_object, length)) {
501         int err = errno;
502         string errtxt = ErrorString(err);
503         logprintf(3, "Log: failed to ftruncate shared "
504                       "smallpage object - err %d (%s)\n",
505                   err, errtxt.c_str());
506         break;
507       }
508 
509       // 32 bit linux apps can only use ~1.4G of address space.
510       // Use dynamic mapping for allocations larger than that.
511       // Currently perf hit is ~10% for this.
512       if (prefer_dynamic_mapping) {
513         dynamic_mapped_shmem_ = true;
514       } else {
515         // Do a full mapping here otherwise.
516         shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
517                          MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
518                          shm_object, NULL);
519         if (shmaddr == reinterpret_cast<void*>(-1)) {
520           int err = errno;
521           string errtxt = ErrorString(err);
522           logprintf(0, "Log: failed to map shared "
523                        "smallpage object - err %d (%s).\n",
524                     err, errtxt.c_str());
525           break;
526         }
527       }
528 
529       use_posix_shm_ = true;
530       shmid_ = shm_object;
531       buf = shmaddr;
532       char location_message[256] = "";
533       if (dynamic_mapped_shmem_) {
534         sprintf(location_message, "mapped as needed");
535       } else {
536         sprintf(location_message, "at %p", shmaddr);
537       }
538       logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
539                 shm_object, location_message);
540     } while (0);
541     shm_unlink("/stressapptest");
542   }
543 #endif // HAVE_SYS_SHM_H
544 
545   if (!use_hugepages_ && !use_posix_shm_) {
546     // Use memalign to ensure that blocks are aligned enough for disk direct IO.
547     buf = static_cast<char*>(memalign(4096, length));
548     if (buf) {
549       logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
550     } else {
551       logprintf(0, "Process Error: memalign returned 0\n");
552       if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
553         logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
554                      "bit process. Please setup shared memory.\n");
555       }
556     }
557   }
558 
559   testmem_ = buf;
560   if (buf || dynamic_mapped_shmem_) {
561     testmemsize_ = length;
562   } else {
563     testmemsize_ = 0;
564   }
565 
566   return (buf != 0) || dynamic_mapped_shmem_;
567 }
568 
569 // Free the test memory.
FreeTestMem()570 void OsLayer::FreeTestMem() {
571   if (testmem_) {
572     if (use_hugepages_) {
573 #ifdef HAVE_SYS_SHM_H
574       shmdt(testmem_);
575       shmctl(shmid_, IPC_RMID, NULL);
576 #endif
577     } else if (use_posix_shm_) {
578       if (!dynamic_mapped_shmem_) {
579         munmap(testmem_, testmemsize_);
580       }
581       close(shmid_);
582     } else {
583       free(testmem_);
584     }
585     testmem_ = 0;
586     testmemsize_ = 0;
587   }
588 }
589 
590 
591 // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)592 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
593   sat_assert((offset + length) <= testmemsize_);
594   if (dynamic_mapped_shmem_) {
595     // TODO(nsanders): Check if we can support MAP_NONBLOCK,
596     // and evaluate performance hit from not using it.
597 #ifdef HAVE_MMAP64
598     void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
599                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
600                      shmid_, offset);
601 #else
602     void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
603                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
604                      shmid_, offset);
605 #endif
606     if (mapping == MAP_FAILED) {
607       string errtxt = ErrorString(errno);
608       logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
609                    "error: %s.\n",
610                 offset, length, errtxt.c_str());
611       sat_assert(0);
612     }
613     return mapping;
614   }
615 
616   return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
617 }
618 
619 // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)620 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
621   if (dynamic_mapped_shmem_) {
622     int retval = munmap(addr, length);
623     if (retval == -1) {
624       string errtxt = ErrorString(errno);
625       logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
626                    "error: %s.\n",
627                 addr, length, errtxt.c_str());
628       sat_assert(0);
629     }
630   }
631 }
632 
633 // No error polling on unknown systems.
ErrorPoll()634 int OsLayer::ErrorPoll() {
635   return 0;
636 }
637 
638 // Generally, poll for errors once per second.
ErrorWait()639 void OsLayer::ErrorWait() {
640   sat_sleep(1);
641   return;
642 }
643 
644 // Open a PCI bus-dev-func as a file and return its file descriptor.
645 // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)646 int OsLayer::PciOpen(int bus, int device, int function) {
647   char dev_file[256];
648 
649   snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
650            bus, device, function);
651 
652   int fd = open(dev_file, O_RDWR);
653   if (fd == -1) {
654     logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
655                  "function %d (errno %d).\n",
656               bus, device, function, errno);
657     return -1;
658   }
659 
660   return fd;
661 }
662 
663 
664 // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)665 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
666   // Strict aliasing rules lawyers will cause data corruption
667   // on cast pointers in some gccs.
668   union {
669     uint32 l32;
670     uint16 l16;
671     uint8 l8;
672   } datacast;
673   datacast.l32 = 0;
674   uint32 size = width / 8;
675 
676   sat_assert((width == 32) || (width == 16) || (width == 8));
677   sat_assert(offset <= (256 - size));
678 
679   if (lseek(fd, offset, SEEK_SET) < 0) {
680     logprintf(0, "Process Error: Can't seek %x\n", offset);
681     return 0;
682   }
683   if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
684     logprintf(0, "Process Error: Can't read %x\n", offset);
685     return 0;
686   }
687 
688   // Extract the data.
689   switch (width) {
690     case 8:
691       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
692       return datacast.l8;
693     case 16:
694       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
695       return datacast.l16;
696     case 32:
697       return datacast.l32;
698   }
699   return 0;
700 }
701 
PciWrite(int fd,uint32 offset,uint32 value,int width)702 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
703   // Strict aliasing rules lawyers will cause data corruption
704   // on cast pointers in some gccs.
705   union {
706     uint32 l32;
707     uint16 l16;
708     uint8 l8;
709   } datacast;
710   datacast.l32 = 0;
711   uint32 size = width / 8;
712 
713   sat_assert((width == 32) || (width == 16) || (width == 8));
714   sat_assert(offset <= (256 - size));
715 
716   // Cram the data into the right alignment.
717   switch (width) {
718     case 8:
719       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
720       datacast.l8 = value;
721     case 16:
722       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
723       datacast.l16 = value;
724     case 32:
725       datacast.l32 = value;
726   }
727 
728   if (lseek(fd, offset, SEEK_SET) < 0) {
729     logprintf(0, "Process Error: Can't seek %x\n", offset);
730     return;
731   }
732   if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
733     logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
734     return;
735   }
736 
737   return;
738 }
739 
740 
741 
742 // Open dev msr.
OpenMSR(uint32 core,uint32 address)743 int OsLayer::OpenMSR(uint32 core, uint32 address) {
744   char buf[256];
745   snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
746   int fd = open(buf, O_RDWR);
747   if (fd < 0)
748     return fd;
749 
750   uint32 pos = lseek(fd, address, SEEK_SET);
751   if (pos != address) {
752     close(fd);
753     logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
754     return -1;
755   }
756 
757   return fd;
758 }
759 
ReadMSR(uint32 core,uint32 address,uint64 * data)760 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
761   int fd = OpenMSR(core, address);
762   if (fd < 0)
763     return false;
764 
765   // Read from the msr.
766   bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
767 
768   if (!res)
769     logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
770 
771   close(fd);
772 
773   return res;
774 }
775 
WriteMSR(uint32 core,uint32 address,uint64 * data)776 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
777   int fd = OpenMSR(core, address);
778   if (fd < 0)
779     return false;
780 
781   // Write to the msr
782   bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
783 
784   if (!res)
785     logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
786 
787   close(fd);
788 
789   return res;
790 }
791 
792 // Extract bits [n+len-1, n] from a 32 bit word.
793 // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)794 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
795   return (val >> n) & ((1<<len) - 1);
796 }
797 
798 // Generic CPU stress workload that would work on any CPU/Platform.
799 // Float-point array moving average calculation.
CpuStressWorkload()800 bool OsLayer::CpuStressWorkload() {
801   double float_arr[100];
802   double sum = 0;
803   unsigned int seed = 12345;
804 
805   // Initialize array with random numbers.
806   for (int i = 0; i < 100; i++) {
807 #ifdef HAVE_RAND_R
808     float_arr[i] = rand_r(&seed);
809     if (rand_r(&seed) % 2)
810       float_arr[i] *= -1.0;
811 #else
812     float_arr[i] = rand();
813     if (rand() % 2)
814       float_arr[i] *= -1.0;
815 #endif
816   }
817 
818   // Calculate moving average.
819   for (int i = 0; i < 100000000; i++) {
820     float_arr[i % 100] =
821       (float_arr[i % 100] + float_arr[(i + 1) % 100] +
822        float_arr[(i + 99) % 100]) / 3;
823     sum += float_arr[i % 100];
824   }
825 
826   // Artificial printf so the loops do not get optimized away.
827   if (sum == 0.0)
828     logprintf(12, "Log: I'm Feeling Lucky!\n");
829   return true;
830 }
831 
GetPCIDevices()832 PCIDevices OsLayer::GetPCIDevices() {
833   PCIDevices device_list;
834   DIR *dir;
835   struct dirent *buf = new struct dirent();
836   struct dirent *entry;
837   dir = opendir(kSysfsPath);
838   if (!dir)
839     logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
840   while (readdir_r(dir, buf, &entry) == 0 && entry) {
841     PCIDevice *device;
842     unsigned int dev, func;
843     // ".", ".." or a special non-device perhaps.
844     if (entry->d_name[0] == '.')
845       continue;
846 
847     device = new PCIDevice();
848     if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
849                &device->domain, &device->bus, &dev, &func) < 4) {
850       logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
851       free(device);
852       continue;
853     }
854     device->dev = dev;
855     device->func = func;
856     device->vendor_id = PCIGetValue(entry->d_name, "vendor");
857     device->device_id = PCIGetValue(entry->d_name, "device");
858     PCIGetResources(entry->d_name, device);
859     device_list.insert(device_list.end(), device);
860   }
861   closedir(dir);
862   delete buf;
863   return device_list;
864 }
865 
PCIGetValue(string name,string object)866 int OsLayer::PCIGetValue(string name, string object) {
867   int fd, len;
868   char filename[256];
869   char buf[256];
870   snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
871            name.c_str(), object.c_str());
872   fd = open(filename, O_RDONLY);
873   if (fd < 0)
874     return 0;
875   len = read(fd, buf, 256);
876   close(fd);
877   buf[len] = '\0';
878   return strtol(buf, NULL, 0);  // NOLINT
879 }
880 
PCIGetResources(string name,PCIDevice * device)881 int OsLayer::PCIGetResources(string name, PCIDevice *device) {
882   char filename[256];
883   char buf[256];
884   FILE *file;
885   int64 start;
886   int64 end;
887   int64 size;
888   int i;
889   snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
890            name.c_str(), "resource");
891   file = fopen(filename, "r");
892   if (!file) {
893     logprintf(0, "Process Error: impossible to find resource file for %s",
894               filename);
895     return errno;
896   }
897   for (i = 0; i < 6; i++) {
898     if (!fgets(buf, 256, file))
899       break;
900     sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
901     size = 0;
902     if (start)
903       size = end - start + 1;
904     device->base_addr[i] = start;
905     device->size[i] = size;
906   }
907   fclose(file);
908   return 0;
909 }
910