• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2006 Google Inc. All Rights Reserved.
2  // Author: nsanders, menderico
3  
4  // Licensed under the Apache License, Version 2.0 (the "License");
5  // you may not use this file except in compliance with the License.
6  // You may obtain a copy of the License at
7  
8  //      http://www.apache.org/licenses/LICENSE-2.0
9  
10  // Unless required by applicable law or agreed to in writing, software
11  // distributed under the License is distributed on an "AS IS" BASIS,
12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  // See the License for the specific language governing permissions and
14  // limitations under the License.
15  
16  // os.cc : os and machine specific implementation
17  // This file includes an abstracted interface
18  // for linux-distro specific and HW specific
19  // interfaces.
20  
21  #include "os.h"
22  
23  #include <errno.h>
24  #include <fcntl.h>
25  #include <linux/types.h>
26  #include <malloc.h>
27  #include <stdio.h>
28  #include <stdlib.h>
29  #include <string.h>
30  #include <sys/mman.h>
31  #include <sys/ioctl.h>
32  #include <sys/time.h>
33  #include <sys/types.h>
34  #include <sys/ipc.h>
35  #ifdef HAVE_SYS_SHM_H
36  #include <sys/shm.h>
37  #endif
38  #include <unistd.h>
39  
40  #ifndef SHM_HUGETLB
41  #define SHM_HUGETLB      04000  // remove when glibc defines it
42  #endif
43  
44  #include <string>
45  #include <list>
46  
47  // This file must work with autoconf on its public version,
48  // so these includes are correct.
49  #include "sattypes.h"
50  #include "error_diag.h"
51  
52  // OsLayer initialization.
OsLayer()53  OsLayer::OsLayer() {
54    testmem_ = 0;
55    testmemsize_ = 0;
56    totalmemsize_ = 0;
57    min_hugepages_bytes_ = 0;
58    normal_mem_ = true;
59    use_hugepages_ = false;
60    use_posix_shm_ = false;
61    dynamic_mapped_shmem_ = false;
62    shmid_ = 0;
63  
64    time_initialized_ = 0;
65  
66    regionsize_ = 0;
67    regioncount_ = 1;
68    num_cpus_ = 0;
69    num_nodes_ = 0;
70    num_cpus_per_node_ = 0;
71    error_diagnoser_ = 0;
72    err_log_callback_ = 0;
73    error_injection_ = false;
74  
75    void *pvoid = 0;
76    address_mode_ = sizeof(pvoid) * 8;
77  
78    has_clflush_ = false;
79    has_sse2_ = false;
80  
81    use_flush_page_cache_ = false;
82  }
83  
84  // OsLayer cleanup.
~OsLayer()85  OsLayer::~OsLayer() {
86    if (error_diagnoser_)
87      delete error_diagnoser_;
88  }
89  
90  // OsLayer initialization.
Initialize()91  bool OsLayer::Initialize() {
92    time_initialized_ = time(NULL);
93    // Detect asm support.
94    GetFeatures();
95  
96    if (num_cpus_ == 0) {
97      num_nodes_ = 1;
98      num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
99      num_cpus_per_node_ = num_cpus_ / num_nodes_;
100    }
101    logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
102    sat_assert(CPU_SETSIZE >= num_cpus_);
103    cpu_sets_.resize(num_nodes_);
104    cpu_sets_valid_.resize(num_nodes_);
105    // Create error diagnoser.
106    error_diagnoser_ = new ErrorDiag();
107    if (!error_diagnoser_->set_os(this))
108      return false;
109    return true;
110  }
111  
112  // Machine type detected. Can we implement all these functions correctly?
IsSupported()113  bool OsLayer::IsSupported() {
114    if (kOpenSource) {
115      // There are no explicitly supported systems in open source version.
116      return true;
117    }
118  
119    // This is the default empty implementation.
120    // SAT won't report full error information.
121    return false;
122  }
123  
AddressMode()124  int OsLayer::AddressMode() {
125    // Detect 32/64 bit binary.
126    void *pvoid = 0;
127    return sizeof(pvoid) * 8;
128  }
129  
130  // Translates user virtual to physical address.
VirtualToPhysical(void * vaddr)131  uint64 OsLayer::VirtualToPhysical(void *vaddr) {
132    // Needs platform specific implementation.
133    return 0;
134  }
135  
136  // Returns the HD device that contains this file.
FindFileDevice(string filename)137  string OsLayer::FindFileDevice(string filename) {
138    return "hdUnknown";
139  }
140  
141  // Returns a list of locations corresponding to HD devices.
FindFileDevices()142  list<string> OsLayer::FindFileDevices() {
143    // No autodetection on unknown systems.
144    list<string> locations;
145    return locations;
146  }
147  
148  
149  // Get HW core features from cpuid instruction.
GetFeatures()150  void OsLayer::GetFeatures() {
151  #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
152    // CPUID features documented at:
153    // http://www.sandpile.org/ia32/cpuid.htm
154    int ax, bx, cx, dx;
155    __asm__ __volatile__ (
156        "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
157    has_clflush_ = (dx >> 19) & 1;
158    has_sse2_ = (dx >> 26) & 1;
159  
160    logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
161              has_clflush_ ? "true" : "false",
162              has_sse2_ ? "true" : "false");
163  #elif defined(STRESSAPPTEST_CPU_PPC)
164    // All PPC implementations have cache flush instructions.
165    has_clflush_ = true;
166  #elif defined(STRESSAPPTEST_CPU_ARMV7A)
167  #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
168  #else
169  #warning "Unsupported CPU type: unable to determine feature set."
170  #endif
171  }
172  
173  
174  // Enable FlushPageCache to be functional instead of a NOP.
ActivateFlushPageCache(void)175  void OsLayer::ActivateFlushPageCache(void) {
176    logprintf(9, "Log: page cache will be flushed as needed\n");
177    use_flush_page_cache_ = true;
178  }
179  
180  // Flush the page cache to ensure reads come from the disk.
FlushPageCache(void)181  bool OsLayer::FlushPageCache(void) {
182    if (!use_flush_page_cache_)
183      return true;
184  
185    // First, ask the kernel to write the cache to the disk.
186    sync();
187  
188    // Second, ask the kernel to empty the cache by writing "1" to
189    // "/proc/sys/vm/drop_caches".
190    static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
191    int dcfile = open(drop_caches_file, O_WRONLY);
192    if (dcfile < 0) {
193      int err = errno;
194      string errtxt = ErrorString(err);
195      logprintf(3, "Log: failed to open %s - err %d (%s)\n",
196                drop_caches_file, err, errtxt.c_str());
197      return false;
198    }
199  
200    ssize_t bytes_written = write(dcfile, "1", 1);
201    close(dcfile);
202  
203    if (bytes_written != 1) {
204      int err = errno;
205      string errtxt = ErrorString(err);
206      logprintf(3, "Log: failed to write %s - err %d (%s)\n",
207                drop_caches_file, err, errtxt.c_str());
208      return false;
209    }
210    return true;
211  }
212  
213  
214  // We need to flush the cacheline here.
Flush(void * vaddr)215  void OsLayer::Flush(void *vaddr) {
216    // Use the generic flush. This function is just so we can override
217    // this if we are so inclined.
218    if (has_clflush_)
219      FastFlush(vaddr);
220  }
221  
222  
223  // Run C or ASM copy as appropriate..
AdlerMemcpyWarm(uint64 * dstmem,uint64 * srcmem,unsigned int size_in_bytes,AdlerChecksum * checksum)224  bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
225                                unsigned int size_in_bytes,
226                                AdlerChecksum *checksum) {
227    if (has_sse2_) {
228      return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
229    } else {
230      return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
231    }
232  }
233  
234  
235  // Translate user virtual to physical address.
FindDimm(uint64 addr,char * buf,int len)236  int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
237    char tmpbuf[256];
238    snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
239    snprintf(buf, len, "%s", tmpbuf);
240    return 0;
241  }
242  
243  
244  // Classifies addresses according to "regions"
245  // This isn't really implemented meaningfully here..
FindRegion(uint64 addr)246  int32 OsLayer::FindRegion(uint64 addr) {
247    static bool warned = false;
248  
249    if (regionsize_ == 0) {
250      regionsize_ = totalmemsize_ / 8;
251      if (regionsize_ < 512 * kMegabyte)
252        regionsize_ = 512 * kMegabyte;
253      regioncount_ = totalmemsize_ / regionsize_;
254      if (regioncount_ < 1) regioncount_ = 1;
255    }
256  
257    int32 region_num = addr / regionsize_;
258    if (region_num >= regioncount_) {
259      if (!warned) {
260          logprintf(0, "Log: region number %d exceeds region count %d\n",
261                    region_num, regioncount_);
262          warned = true;
263      }
264      region_num = region_num % regioncount_;
265    }
266    return region_num;
267  }
268  
269  // Report which cores are associated with a given region.
FindCoreMask(int32 region)270  cpu_set_t *OsLayer::FindCoreMask(int32 region) {
271    sat_assert(region >= 0);
272    region %= num_nodes_;
273    if (!cpu_sets_valid_[region]) {
274      CPU_ZERO(&cpu_sets_[region]);
275      for (int i = 0; i < num_cpus_per_node_; ++i) {
276        CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
277      }
278      cpu_sets_valid_[region] = true;
279      logprintf(5, "Log: Region %d mask 0x%s\n",
280                   region, FindCoreMaskFormat(region).c_str());
281    }
282    return &cpu_sets_[region];
283  }
284  
285  // Return cores associated with a given region in hex string.
FindCoreMaskFormat(int32 region)286  string OsLayer::FindCoreMaskFormat(int32 region) {
287    cpu_set_t* mask = FindCoreMask(region);
288    string format = cpuset_format(mask);
289    if (format.size() < 8)
290      format = string(8 - format.size(), '0') + format;
291    return format;
292  }
293  
294  // Report an error in an easily parseable way.
ErrorReport(const char * part,const char * symptom,int count)295  bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
296    time_t now = time(NULL);
297    int ttf = now - time_initialized_;
298    logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
299    return true;
300  }
301  
302  // Read the number of hugepages out of the kernel interface in proc.
FindHugePages()303  int64 OsLayer::FindHugePages() {
304    char buf[65] = "0";
305  
306    // This is a kernel interface to query the numebr of hugepages
307    // available in the system.
308    static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
309    int hpfile = open(hugepages_info_file, O_RDONLY);
310  
311    ssize_t bytes_read = read(hpfile, buf, 64);
312    close(hpfile);
313  
314    if (bytes_read <= 0) {
315      logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
316                    "read did not provide data\n");
317      return 0;
318    }
319  
320    if (bytes_read == 64) {
321      logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
322                   "is surprisingly large\n");
323      return 0;
324    }
325  
326    // Add a null termintation to be string safe.
327    buf[bytes_read] = '\0';
328    // Read the page count.
329    int64 pages = strtoull(buf, NULL, 10);  // NOLINT
330  
331    return pages;
332  }
333  
FindFreeMemSize()334  int64 OsLayer::FindFreeMemSize() {
335    int64 size = 0;
336    int64 minsize = 0;
337    if (totalmemsize_ > 0)
338      return totalmemsize_;
339  
340    int64 pages = sysconf(_SC_PHYS_PAGES);
341    int64 avpages = sysconf(_SC_AVPHYS_PAGES);
342    int64 pagesize = sysconf(_SC_PAGESIZE);
343    int64 physsize = pages * pagesize;
344    int64 avphyssize = avpages * pagesize;
345  
346    // Assume 2MB hugepages.
347    int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
348  
349    if ((pages == -1) || (pagesize == -1)) {
350      logprintf(0, "Process Error: sysconf could not determine memory size.\n");
351      return 0;
352    }
353  
354    // We want to leave enough stuff for things to run.
355    // If the user specified a minimum amount of memory to expect, require that.
356    // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
357    // If less than 2GB is present use 85% of what's available.
358    // These are fairly arbitrary numbers that seem to work OK.
359    //
360    // TODO(nsanders): is there a more correct way to determine target
361    // memory size?
362    if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
363      minsize = min_hugepages_bytes_;
364    } else if (physsize < 2048LL * kMegabyte) {
365      minsize = ((pages * 85) / 100) * pagesize;
366    } else {
367      minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
368    }
369  
370    // Use hugepage sizing if available.
371    if (hugepagesize > 0) {
372      if (hugepagesize < minsize) {
373        logprintf(0, "Procedural Error: Not enough hugepages. "
374                     "%lldMB available < %lldMB required.\n",
375                  hugepagesize / kMegabyte,
376                  minsize / kMegabyte);
377        // Require the calculated minimum amount of memory.
378        size = minsize;
379      } else {
380        // Require that we get all hugepages.
381        size = hugepagesize;
382      }
383    } else {
384      // Require the calculated minimum amount of memory.
385      size = minsize;
386    }
387  
388    logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
389                 "Targeting %lld MB (%lld%%)\n",
390              physsize / kMegabyte,
391              avphyssize / kMegabyte,
392              hugepagesize / kMegabyte,
393              size / kMegabyte,
394              size * 100 / physsize);
395  
396    totalmemsize_ = size;
397    return size;
398  }
399  
400  // Allocates all memory available.
AllocateAllMem()401  int64 OsLayer::AllocateAllMem() {
402    int64 length = FindFreeMemSize();
403    bool retval = AllocateTestMem(length, 0);
404    if (retval)
405      return length;
406    else
407      return 0;
408  }
409  
410  // Allocate the target memory. This may be from malloc, hugepage pool
411  // or other platform specific sources.
AllocateTestMem(int64 length,uint64 paddr_base)412  bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
413    // Try hugepages first.
414    void *buf = 0;
415  
416    sat_assert(length >= 0);
417  
418    if (paddr_base)
419      logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
420                " ignore.\n", paddr_base);
421  
422    // Determine optimal memory allocation path.
423    bool prefer_hugepages = false;
424    bool prefer_posix_shm = false;
425    bool prefer_dynamic_mapping = false;
426  
427    // Are there enough hugepages?
428    int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
429    // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
430    if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
431      prefer_dynamic_mapping = true;
432      prefer_posix_shm = true;
433      logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
434      logprintf(3, "Log: You may need to run "
435                   "'sudo mount -o remount,size=100\% /dev/shm.'\n");
436    } else if (hugepagesize >= length) {
437      prefer_hugepages = true;
438      logprintf(3, "Log: Prefer using hugepace allocation.\n");
439    } else {
440      logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
441    }
442  
443  #ifdef HAVE_SYS_SHM_H
444    // Allocate hugepage mapped memory.
445    if (prefer_hugepages) {
446      do { // Allow break statement.
447        int shmid;
448        void *shmaddr;
449  
450        if ((shmid = shmget(2, length,
451                SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
452          int err = errno;
453          string errtxt = ErrorString(err);
454          logprintf(3, "Log: failed to allocate shared hugepage "
455                        "object - err %d (%s)\n",
456                    err, errtxt.c_str());
457          logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
458          break;
459        }
460  
461        shmaddr = shmat(shmid, NULL, NULL);
462        if (shmaddr == reinterpret_cast<void*>(-1)) {
463          int err = errno;
464          string errtxt = ErrorString(err);
465          logprintf(0, "Log: failed to attach shared "
466                       "hugepage object - err %d (%s).\n",
467                    err, errtxt.c_str());
468          if (shmctl(shmid, IPC_RMID, NULL) < 0) {
469            int err = errno;
470            string errtxt = ErrorString(err);
471            logprintf(0, "Log: failed to remove shared "
472                         "hugepage object - err %d (%s).\n",
473                      err, errtxt.c_str());
474          }
475          break;
476        }
477        use_hugepages_ = true;
478        shmid_ = shmid;
479        buf = shmaddr;
480        logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
481                  shmid, shmaddr);
482      } while (0);
483    }
484  
485    if ((!use_hugepages_) && prefer_posix_shm) {
486      do {
487        int shm_object;
488        void *shmaddr = NULL;
489  
490        shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
491        if (shm_object < 0) {
492          int err = errno;
493          string errtxt = ErrorString(err);
494          logprintf(3, "Log: failed to allocate shared "
495                        "smallpage object - err %d (%s)\n",
496                    err, errtxt.c_str());
497          break;
498        }
499  
500        if (0 > ftruncate(shm_object, length)) {
501          int err = errno;
502          string errtxt = ErrorString(err);
503          logprintf(3, "Log: failed to ftruncate shared "
504                        "smallpage object - err %d (%s)\n",
505                    err, errtxt.c_str());
506          break;
507        }
508  
509        // 32 bit linux apps can only use ~1.4G of address space.
510        // Use dynamic mapping for allocations larger than that.
511        // Currently perf hit is ~10% for this.
512        if (prefer_dynamic_mapping) {
513          dynamic_mapped_shmem_ = true;
514        } else {
515          // Do a full mapping here otherwise.
516          shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
517                           MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
518                           shm_object, NULL);
519          if (shmaddr == reinterpret_cast<void*>(-1)) {
520            int err = errno;
521            string errtxt = ErrorString(err);
522            logprintf(0, "Log: failed to map shared "
523                         "smallpage object - err %d (%s).\n",
524                      err, errtxt.c_str());
525            break;
526          }
527        }
528  
529        use_posix_shm_ = true;
530        shmid_ = shm_object;
531        buf = shmaddr;
532        char location_message[256] = "";
533        if (dynamic_mapped_shmem_) {
534          sprintf(location_message, "mapped as needed");
535        } else {
536          sprintf(location_message, "at %p", shmaddr);
537        }
538        logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
539                  shm_object, location_message);
540      } while (0);
541      shm_unlink("/stressapptest");
542    }
543  #endif // HAVE_SYS_SHM_H
544  
545    if (!use_hugepages_ && !use_posix_shm_) {
546      // Use memalign to ensure that blocks are aligned enough for disk direct IO.
547      buf = static_cast<char*>(memalign(4096, length));
548      if (buf) {
549        logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
550      } else {
551        logprintf(0, "Process Error: memalign returned 0\n");
552        if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
553          logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
554                       "bit process. Please setup shared memory.\n");
555        }
556      }
557    }
558  
559    testmem_ = buf;
560    if (buf || dynamic_mapped_shmem_) {
561      testmemsize_ = length;
562    } else {
563      testmemsize_ = 0;
564    }
565  
566    return (buf != 0) || dynamic_mapped_shmem_;
567  }
568  
569  // Free the test memory.
FreeTestMem()570  void OsLayer::FreeTestMem() {
571    if (testmem_) {
572      if (use_hugepages_) {
573  #ifdef HAVE_SYS_SHM_H
574        shmdt(testmem_);
575        shmctl(shmid_, IPC_RMID, NULL);
576  #endif
577      } else if (use_posix_shm_) {
578        if (!dynamic_mapped_shmem_) {
579          munmap(testmem_, testmemsize_);
580        }
581        close(shmid_);
582      } else {
583        free(testmem_);
584      }
585      testmem_ = 0;
586      testmemsize_ = 0;
587    }
588  }
589  
590  
591  // Prepare the target memory. It may requre mapping in, or this may be a noop.
PrepareTestMem(uint64 offset,uint64 length)592  void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
593    sat_assert((offset + length) <= testmemsize_);
594    if (dynamic_mapped_shmem_) {
595      // TODO(nsanders): Check if we can support MAP_NONBLOCK,
596      // and evaluate performance hit from not using it.
597  #ifdef HAVE_MMAP64
598      void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
599                       MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
600                       shmid_, offset);
601  #else
602      void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
603                       MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
604                       shmid_, offset);
605  #endif
606      if (mapping == MAP_FAILED) {
607        string errtxt = ErrorString(errno);
608        logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
609                     "error: %s.\n",
610                  offset, length, errtxt.c_str());
611        sat_assert(0);
612      }
613      return mapping;
614    }
615  
616    return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
617  }
618  
619  // Release the test memory resources, if any.
ReleaseTestMem(void * addr,uint64 offset,uint64 length)620  void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
621    if (dynamic_mapped_shmem_) {
622      int retval = munmap(addr, length);
623      if (retval == -1) {
624        string errtxt = ErrorString(errno);
625        logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
626                     "error: %s.\n",
627                  addr, length, errtxt.c_str());
628        sat_assert(0);
629      }
630    }
631  }
632  
633  // No error polling on unknown systems.
ErrorPoll()634  int OsLayer::ErrorPoll() {
635    return 0;
636  }
637  
638  // Generally, poll for errors once per second.
ErrorWait()639  void OsLayer::ErrorWait() {
640    sat_sleep(1);
641    return;
642  }
643  
644  // Open a PCI bus-dev-func as a file and return its file descriptor.
645  // Error is indicated by return value less than zero.
PciOpen(int bus,int device,int function)646  int OsLayer::PciOpen(int bus, int device, int function) {
647    char dev_file[256];
648  
649    snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
650             bus, device, function);
651  
652    int fd = open(dev_file, O_RDWR);
653    if (fd == -1) {
654      logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
655                   "function %d (errno %d).\n",
656                bus, device, function, errno);
657      return -1;
658    }
659  
660    return fd;
661  }
662  
663  
664  // Read and write functions to access PCI config.
PciRead(int fd,uint32 offset,int width)665  uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
666    // Strict aliasing rules lawyers will cause data corruption
667    // on cast pointers in some gccs.
668    union {
669      uint32 l32;
670      uint16 l16;
671      uint8 l8;
672    } datacast;
673    datacast.l32 = 0;
674    uint32 size = width / 8;
675  
676    sat_assert((width == 32) || (width == 16) || (width == 8));
677    sat_assert(offset <= (256 - size));
678  
679    if (lseek(fd, offset, SEEK_SET) < 0) {
680      logprintf(0, "Process Error: Can't seek %x\n", offset);
681      return 0;
682    }
683    if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
684      logprintf(0, "Process Error: Can't read %x\n", offset);
685      return 0;
686    }
687  
688    // Extract the data.
689    switch (width) {
690      case 8:
691        sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
692        return datacast.l8;
693      case 16:
694        sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
695        return datacast.l16;
696      case 32:
697        return datacast.l32;
698    }
699    return 0;
700  }
701  
PciWrite(int fd,uint32 offset,uint32 value,int width)702  void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
703    // Strict aliasing rules lawyers will cause data corruption
704    // on cast pointers in some gccs.
705    union {
706      uint32 l32;
707      uint16 l16;
708      uint8 l8;
709    } datacast;
710    datacast.l32 = 0;
711    uint32 size = width / 8;
712  
713    sat_assert((width == 32) || (width == 16) || (width == 8));
714    sat_assert(offset <= (256 - size));
715  
716    // Cram the data into the right alignment.
717    switch (width) {
718      case 8:
719        sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
720        datacast.l8 = value;
721      case 16:
722        sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
723        datacast.l16 = value;
724      case 32:
725        datacast.l32 = value;
726    }
727  
728    if (lseek(fd, offset, SEEK_SET) < 0) {
729      logprintf(0, "Process Error: Can't seek %x\n", offset);
730      return;
731    }
732    if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
733      logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
734      return;
735    }
736  
737    return;
738  }
739  
740  
741  
742  // Open dev msr.
OpenMSR(uint32 core,uint32 address)743  int OsLayer::OpenMSR(uint32 core, uint32 address) {
744    char buf[256];
745    snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
746    int fd = open(buf, O_RDWR);
747    if (fd < 0)
748      return fd;
749  
750    uint32 pos = lseek(fd, address, SEEK_SET);
751    if (pos != address) {
752      close(fd);
753      logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
754      return -1;
755    }
756  
757    return fd;
758  }
759  
ReadMSR(uint32 core,uint32 address,uint64 * data)760  bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
761    int fd = OpenMSR(core, address);
762    if (fd < 0)
763      return false;
764  
765    // Read from the msr.
766    bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
767  
768    if (!res)
769      logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
770  
771    close(fd);
772  
773    return res;
774  }
775  
WriteMSR(uint32 core,uint32 address,uint64 * data)776  bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
777    int fd = OpenMSR(core, address);
778    if (fd < 0)
779      return false;
780  
781    // Write to the msr
782    bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
783  
784    if (!res)
785      logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
786  
787    close(fd);
788  
789    return res;
790  }
791  
792  // Extract bits [n+len-1, n] from a 32 bit word.
793  // so GetBitField(0x0f00, 8, 4) == 0xf.
GetBitField(uint32 val,uint32 n,uint32 len)794  uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
795    return (val >> n) & ((1<<len) - 1);
796  }
797  
798  // Generic CPU stress workload that would work on any CPU/Platform.
799  // Float-point array moving average calculation.
CpuStressWorkload()800  bool OsLayer::CpuStressWorkload() {
801    double float_arr[100];
802    double sum = 0;
803    unsigned int seed = 12345;
804  
805    // Initialize array with random numbers.
806    for (int i = 0; i < 100; i++) {
807  #ifdef HAVE_RAND_R
808      float_arr[i] = rand_r(&seed);
809      if (rand_r(&seed) % 2)
810        float_arr[i] *= -1.0;
811  #else
812      float_arr[i] = rand();
813      if (rand() % 2)
814        float_arr[i] *= -1.0;
815  #endif
816    }
817  
818    // Calculate moving average.
819    for (int i = 0; i < 100000000; i++) {
820      float_arr[i % 100] =
821        (float_arr[i % 100] + float_arr[(i + 1) % 100] +
822         float_arr[(i + 99) % 100]) / 3;
823      sum += float_arr[i % 100];
824    }
825  
826    // Artificial printf so the loops do not get optimized away.
827    if (sum == 0.0)
828      logprintf(12, "Log: I'm Feeling Lucky!\n");
829    return true;
830  }
831  
GetPCIDevices()832  PCIDevices OsLayer::GetPCIDevices() {
833    PCIDevices device_list;
834    DIR *dir;
835    struct dirent *buf = new struct dirent();
836    struct dirent *entry;
837    dir = opendir(kSysfsPath);
838    if (!dir)
839      logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
840    while (readdir_r(dir, buf, &entry) == 0 && entry) {
841      PCIDevice *device;
842      unsigned int dev, func;
843      // ".", ".." or a special non-device perhaps.
844      if (entry->d_name[0] == '.')
845        continue;
846  
847      device = new PCIDevice();
848      if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
849                 &device->domain, &device->bus, &dev, &func) < 4) {
850        logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
851        free(device);
852        continue;
853      }
854      device->dev = dev;
855      device->func = func;
856      device->vendor_id = PCIGetValue(entry->d_name, "vendor");
857      device->device_id = PCIGetValue(entry->d_name, "device");
858      PCIGetResources(entry->d_name, device);
859      device_list.insert(device_list.end(), device);
860    }
861    closedir(dir);
862    delete buf;
863    return device_list;
864  }
865  
PCIGetValue(string name,string object)866  int OsLayer::PCIGetValue(string name, string object) {
867    int fd, len;
868    char filename[256];
869    char buf[256];
870    snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
871             name.c_str(), object.c_str());
872    fd = open(filename, O_RDONLY);
873    if (fd < 0)
874      return 0;
875    len = read(fd, buf, 256);
876    close(fd);
877    buf[len] = '\0';
878    return strtol(buf, NULL, 0);  // NOLINT
879  }
880  
PCIGetResources(string name,PCIDevice * device)881  int OsLayer::PCIGetResources(string name, PCIDevice *device) {
882    char filename[256];
883    char buf[256];
884    FILE *file;
885    int64 start;
886    int64 end;
887    int64 size;
888    int i;
889    snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
890             name.c_str(), "resource");
891    file = fopen(filename, "r");
892    if (!file) {
893      logprintf(0, "Process Error: impossible to find resource file for %s",
894                filename);
895      return errno;
896    }
897    for (i = 0; i < 6; i++) {
898      if (!fgets(buf, 256, file))
899        break;
900      sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
901      size = 0;
902      if (start)
903        size = end - start + 1;
904      device->base_addr[i] = start;
905      device->size[i] = size;
906    }
907    fclose(file);
908    return 0;
909  }
910