1 // Copyright 2006 Google Inc. All Rights Reserved.
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6
7 // http://www.apache.org/licenses/LICENSE-2.0
8
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // worker.cc : individual tasks that can be run in combination to
16 // stress the system
17
18 #include <errno.h>
19 #include <pthread.h>
20 #include <sched.h>
21 #include <signal.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <string.h>
26 #include <time.h>
27 #include <unistd.h>
28
29 #include <sys/select.h>
30 #include <sys/stat.h>
31 #include <sys/types.h>
32 #include <sys/times.h>
33
34 // These are necessary, but on by default
35 // #define __USE_GNU
36 // #define __USE_LARGEFILE64
37 #include <fcntl.h>
38 #include <sys/socket.h>
39 #include <netdb.h>
40 #include <arpa/inet.h>
41 #include <linux/unistd.h> // for gettid
42
43 // For size of block device
44 #include <sys/ioctl.h>
45 #include <linux/fs.h>
46 // For asynchronous I/O
47 #ifdef HAVE_LIBAIO_H
48 #include <libaio.h>
49 #endif
50
51 #include <sys/syscall.h>
52
53 #include <set>
54 #include <string>
55
56 // This file must work with autoconf on its public version,
57 // so these includes are correct.
58 #include "error_diag.h" // NOLINT
59 #include "os.h" // NOLINT
60 #include "pattern.h" // NOLINT
61 #include "queue.h" // NOLINT
62 #include "sat.h" // NOLINT
63 #include "sattypes.h" // NOLINT
64 #include "worker.h" // NOLINT
65
66 // Syscalls
67 // Why ubuntu, do you hate gettid so bad?
68 #if !defined(__NR_gettid)
69 #define __NR_gettid 224
70 #endif
71
72 #define gettid() syscall(__NR_gettid)
73 #if !defined(CPU_SETSIZE)
74 _syscall3(int, sched_getaffinity, pid_t, pid,
75 unsigned int, len, cpu_set_t*, mask)
76 _syscall3(int, sched_setaffinity, pid_t, pid,
77 unsigned int, len, cpu_set_t*, mask)
78 #endif
79
80 namespace {
81 // Work around the sad fact that there are two (gnu, xsi) incompatible
82 // versions of strerror_r floating around google. Awesome.
sat_strerror(int err,char * buf,int len)83 bool sat_strerror(int err, char *buf, int len) {
84 buf[0] = 0;
85 char *errmsg = reinterpret_cast<char*>(strerror_r(err, buf, len));
86 int retval = reinterpret_cast<int64>(errmsg);
87 if (retval == 0)
88 return true;
89 if (retval == -1)
90 return false;
91 if (errmsg != buf) {
92 strncpy(buf, errmsg, len);
93 buf[len - 1] = 0;
94 }
95 return true;
96 }
97
98
addr_to_tag(void * address)99 inline uint64 addr_to_tag(void *address) {
100 return reinterpret_cast<uint64>(address);
101 }
102 } // namespace
103
104 #if !defined(O_DIRECT)
105 // Sometimes this isn't available.
106 // Disregard if it's not defined.
107 #define O_DIRECT 0
108 #endif
109
110 // A struct to hold captured errors, for later reporting.
111 struct ErrorRecord {
112 uint64 actual; // This is the actual value read.
113 uint64 reread; // This is the actual value, reread.
114 uint64 expected; // This is what it should have been.
115 uint64 *vaddr; // This is where it was (or wasn't).
116 char *vbyteaddr; // This is byte specific where the data was (or wasn't).
117 uint64 paddr; // This is the bus address, if available.
118 uint64 *tagvaddr; // This holds the tag value if this data was tagged.
119 uint64 tagpaddr; // This holds the physical address corresponding to the tag.
120 };
121
122 // This is a helper function to create new threads with pthreads.
ThreadSpawnerGeneric(void * ptr)123 static void *ThreadSpawnerGeneric(void *ptr) {
124 WorkerThread *worker = static_cast<WorkerThread*>(ptr);
125 worker->StartRoutine();
126 return NULL;
127 }
128
Initialize()129 void WorkerStatus::Initialize() {
130 sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
131 sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
132 #ifdef HAVE_PTHREAD_BARRIERS
133 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
134 num_workers_ + 1));
135 #endif
136 }
137
Destroy()138 void WorkerStatus::Destroy() {
139 sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
140 sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
141 #ifdef HAVE_PTHREAD_BARRIERS
142 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
143 #endif
144 }
145
PauseWorkers()146 void WorkerStatus::PauseWorkers() {
147 if (SetStatus(PAUSE) != PAUSE)
148 WaitOnPauseBarrier();
149 }
150
ResumeWorkers()151 void WorkerStatus::ResumeWorkers() {
152 if (SetStatus(RUN) == PAUSE)
153 WaitOnPauseBarrier();
154 }
155
StopWorkers()156 void WorkerStatus::StopWorkers() {
157 if (SetStatus(STOP) == PAUSE)
158 WaitOnPauseBarrier();
159 }
160
ContinueRunning(bool * paused)161 bool WorkerStatus::ContinueRunning(bool *paused) {
162 // This loop is an optimization. We use it to immediately re-check the status
163 // after resuming from a pause, instead of returning and waiting for the next
164 // call to this function.
165 if (paused) {
166 *paused = false;
167 }
168 for (;;) {
169 switch (GetStatus()) {
170 case RUN:
171 return true;
172 case PAUSE:
173 // Wait for the other workers to call this function so that
174 // PauseWorkers() can return.
175 WaitOnPauseBarrier();
176 // Wait for ResumeWorkers() to be called.
177 WaitOnPauseBarrier();
178 // Indicate that a pause occurred.
179 if (paused) {
180 *paused = true;
181 }
182 break;
183 case STOP:
184 return false;
185 }
186 }
187 }
188
ContinueRunningNoPause()189 bool WorkerStatus::ContinueRunningNoPause() {
190 return (GetStatus() != STOP);
191 }
192
RemoveSelf()193 void WorkerStatus::RemoveSelf() {
194 // Acquire a read lock on status_rwlock_ while (status_ != PAUSE).
195 for (;;) {
196 AcquireStatusReadLock();
197 if (status_ != PAUSE)
198 break;
199 // We need to obey PauseWorkers() just like ContinueRunning() would, so that
200 // the other threads won't wait on pause_barrier_ forever.
201 ReleaseStatusLock();
202 // Wait for the other workers to call this function so that PauseWorkers()
203 // can return.
204 WaitOnPauseBarrier();
205 // Wait for ResumeWorkers() to be called.
206 WaitOnPauseBarrier();
207 }
208
209 // This lock would be unnecessary if we held a write lock instead of a read
210 // lock on status_rwlock_, but that would also force all threads calling
211 // ContinueRunning() to wait on this one. Using a separate lock avoids that.
212 AcquireNumWorkersLock();
213 // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
214 // in use because (status != PAUSE).
215 #ifdef HAVE_PTHREAD_BARRIERS
216 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
217 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
218 #endif
219 --num_workers_;
220 ReleaseNumWorkersLock();
221
222 // Release status_rwlock_.
223 ReleaseStatusLock();
224 }
225
226
227 // Parent thread class.
WorkerThread()228 WorkerThread::WorkerThread() {
229 status_ = false;
230 pages_copied_ = 0;
231 errorcount_ = 0;
232 runduration_usec_ = 1;
233 priority_ = Normal;
234 worker_status_ = NULL;
235 thread_spawner_ = &ThreadSpawnerGeneric;
236 tag_mode_ = false;
237 }
238
~WorkerThread()239 WorkerThread::~WorkerThread() {}
240
241 // Constructors. Just init some default values.
FillThread()242 FillThread::FillThread() {
243 num_pages_to_fill_ = 0;
244 }
245
246 // Initialize file name to empty.
FileThread()247 FileThread::FileThread() {
248 filename_ = "";
249 devicename_ = "";
250 pass_ = 0;
251 page_io_ = true;
252 crc_page_ = -1;
253 local_page_ = NULL;
254 }
255
256 // If file thread used bounce buffer in memory, account for the extra
257 // copy for memory bandwidth calculation.
GetMemoryCopiedData()258 float FileThread::GetMemoryCopiedData() {
259 if (!os_->normal_mem())
260 return GetCopiedData();
261 else
262 return 0;
263 }
264
265 // Initialize target hostname to be invalid.
NetworkThread()266 NetworkThread::NetworkThread() {
267 snprintf(ipaddr_, sizeof(ipaddr_), "Unknown");
268 sock_ = 0;
269 }
270
271 // Initialize?
NetworkSlaveThread()272 NetworkSlaveThread::NetworkSlaveThread() {
273 }
274
275 // Initialize?
NetworkListenThread()276 NetworkListenThread::NetworkListenThread() {
277 }
278
279 // Init member variables.
InitThread(int thread_num_init,class Sat * sat_init,class OsLayer * os_init,class PatternList * patternlist_init,WorkerStatus * worker_status)280 void WorkerThread::InitThread(int thread_num_init,
281 class Sat *sat_init,
282 class OsLayer *os_init,
283 class PatternList *patternlist_init,
284 WorkerStatus *worker_status) {
285 sat_assert(worker_status);
286 worker_status->AddWorkers(1);
287
288 thread_num_ = thread_num_init;
289 sat_ = sat_init;
290 os_ = os_init;
291 patternlist_ = patternlist_init;
292 worker_status_ = worker_status;
293
294 AvailableCpus(&cpu_mask_);
295 tag_ = 0xffffffff;
296
297 tag_mode_ = sat_->tag_mode();
298 }
299
300
301 // Use pthreads to prioritize a system thread.
InitPriority()302 bool WorkerThread::InitPriority() {
303 // This doesn't affect performance that much, and may not be too safe.
304
305 bool ret = BindToCpus(&cpu_mask_);
306 if (!ret)
307 logprintf(11, "Log: Bind to %s failed.\n",
308 cpuset_format(&cpu_mask_).c_str());
309
310 logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n",
311 thread_num_, sched_getcpu(),
312 CurrentCpusFormat().c_str(),
313 cpuset_format(&cpu_mask_).c_str());
314 #if 0
315 if (priority_ == High) {
316 sched_param param;
317 param.sched_priority = 1;
318 // Set the priority; others are unchanged.
319 logprintf(0, "Log: Changing priority to SCHED_FIFO %d\n",
320 param.sched_priority);
321 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
322 char buf[256];
323 sat_strerror(errno, buf, sizeof(buf));
324 logprintf(0, "Process Error: sched_setscheduler "
325 "failed - error %d %s\n",
326 errno, buf);
327 }
328 }
329 #endif
330 return true;
331 }
332
333 // Use pthreads to create a system thread.
SpawnThread()334 int WorkerThread::SpawnThread() {
335 // Create the new thread.
336 int result = pthread_create(&thread_, NULL, thread_spawner_, this);
337 if (result) {
338 char buf[256];
339 sat_strerror(result, buf, sizeof(buf));
340 logprintf(0, "Process Error: pthread_create "
341 "failed - error %d %s\n", result,
342 buf);
343 status_ = false;
344 return false;
345 }
346
347 // 0 is pthreads success.
348 return true;
349 }
350
351 // Kill the worker thread with SIGINT.
KillThread()352 bool WorkerThread::KillThread() {
353 return (pthread_kill(thread_, SIGINT) == 0);
354 }
355
356 // Block until thread has exited.
JoinThread()357 bool WorkerThread::JoinThread() {
358 int result = pthread_join(thread_, NULL);
359
360 if (result) {
361 logprintf(0, "Process Error: pthread_join failed - error %d\n", result);
362 status_ = false;
363 }
364
365 // 0 is pthreads success.
366 return (!result);
367 }
368
369
StartRoutine()370 void WorkerThread::StartRoutine() {
371 InitPriority();
372 StartThreadTimer();
373 Work();
374 StopThreadTimer();
375 worker_status_->RemoveSelf();
376 }
377
378
379 // Thread work loop. Execute until marked finished.
Work()380 bool WorkerThread::Work() {
381 do {
382 logprintf(9, "Log: ...\n");
383 // Sleep for 1 second.
384 sat_sleep(1);
385 } while (IsReadyToRun());
386
387 return false;
388 }
389
390
391 // Returns CPU mask of CPUs available to this process,
392 // Conceptually, each bit represents a logical CPU, ie:
393 // mask = 3 (11b): cpu0, 1
394 // mask = 13 (1101b): cpu0, 2, 3
AvailableCpus(cpu_set_t * cpuset)395 bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) {
396 CPU_ZERO(cpuset);
397 #ifdef HAVE_SCHED_GETAFFINITY
398 return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0;
399 #else
400 return 0;
401 #endif
402 }
403
404
405 // Returns CPU mask of CPUs this thread is bound to,
406 // Conceptually, each bit represents a logical CPU, ie:
407 // mask = 3 (11b): cpu0, 1
408 // mask = 13 (1101b): cpu0, 2, 3
CurrentCpus(cpu_set_t * cpuset)409 bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) {
410 CPU_ZERO(cpuset);
411 #ifdef HAVE_SCHED_GETAFFINITY
412 return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0;
413 #else
414 return 0;
415 #endif
416 }
417
418
419 // Bind worker thread to specified CPU(s)
420 // Args:
421 // thread_mask: cpu_set_t representing CPUs, ie
422 // mask = 1 (01b): cpu0
423 // mask = 3 (11b): cpu0, 1
424 // mask = 13 (1101b): cpu0, 2, 3
425 //
426 // Returns true on success, false otherwise.
BindToCpus(const cpu_set_t * thread_mask)427 bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) {
428 cpu_set_t process_mask;
429 AvailableCpus(&process_mask);
430 if (cpuset_isequal(thread_mask, &process_mask))
431 return true;
432
433 logprintf(11, "Log: available CPU mask - %s\n",
434 cpuset_format(&process_mask).c_str());
435 if (!cpuset_issubset(thread_mask, &process_mask)) {
436 // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist.
437 logprintf(0, "Log: requested CPUs %s not a subset of available %s\n",
438 cpuset_format(thread_mask).c_str(),
439 cpuset_format(&process_mask).c_str());
440 return false;
441 }
442 #ifdef HAVE_SCHED_GETAFFINITY
443 return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0);
444 #else
445 return 0;
446 #endif
447 }
448
449
450 // A worker thread can yield itself to give up CPU until it's scheduled again.
451 // Returns true on success, false on error.
YieldSelf()452 bool WorkerThread::YieldSelf() {
453 return (sched_yield() == 0);
454 }
455
456
457 // Fill this page with its pattern.
FillPage(struct page_entry * pe)458 bool WorkerThread::FillPage(struct page_entry *pe) {
459 // Error check arguments.
460 if (pe == 0) {
461 logprintf(0, "Process Error: Fill Page entry null\n");
462 return 0;
463 }
464
465 // Mask is the bitmask of indexes used by the pattern.
466 // It is the pattern size -1. Size is always a power of 2.
467 uint64 *memwords = static_cast<uint64*>(pe->addr);
468 int length = sat_->page_length();
469
470 if (tag_mode_) {
471 // Select tag or data as appropriate.
472 for (int i = 0; i < length / wordsize_; i++) {
473 datacast_t data;
474
475 if ((i & 0x7) == 0) {
476 data.l64 = addr_to_tag(&memwords[i]);
477 } else {
478 data.l32.l = pe->pattern->pattern(i << 1);
479 data.l32.h = pe->pattern->pattern((i << 1) + 1);
480 }
481 memwords[i] = data.l64;
482 }
483 } else {
484 // Just fill in untagged data directly.
485 for (int i = 0; i < length / wordsize_; i++) {
486 datacast_t data;
487
488 data.l32.l = pe->pattern->pattern(i << 1);
489 data.l32.h = pe->pattern->pattern((i << 1) + 1);
490 memwords[i] = data.l64;
491 }
492 }
493
494 return 1;
495 }
496
497
498 // Tell the thread how many pages to fill.
SetFillPages(int64 num_pages_to_fill_init)499 void FillThread::SetFillPages(int64 num_pages_to_fill_init) {
500 num_pages_to_fill_ = num_pages_to_fill_init;
501 }
502
503 // Fill this page with a random pattern.
FillPageRandom(struct page_entry * pe)504 bool FillThread::FillPageRandom(struct page_entry *pe) {
505 // Error check arguments.
506 if (pe == 0) {
507 logprintf(0, "Process Error: Fill Page entry null\n");
508 return 0;
509 }
510 if ((patternlist_ == 0) || (patternlist_->Size() == 0)) {
511 logprintf(0, "Process Error: No data patterns available\n");
512 return 0;
513 }
514
515 // Choose a random pattern for this block.
516 pe->pattern = patternlist_->GetRandomPattern();
517 if (pe->pattern == 0) {
518 logprintf(0, "Process Error: Null data pattern\n");
519 return 0;
520 }
521
522 // Actually fill the page.
523 return FillPage(pe);
524 }
525
526
527 // Memory fill work loop. Execute until alloted pages filled.
Work()528 bool FillThread::Work() {
529 bool result = true;
530
531 logprintf(9, "Log: Starting fill thread %d\n", thread_num_);
532
533 // We want to fill num_pages_to_fill pages, and
534 // stop when we've filled that many.
535 // We also want to capture early break
536 struct page_entry pe;
537 int64 loops = 0;
538 while (IsReadyToRun() && (loops < num_pages_to_fill_)) {
539 result = result && sat_->GetEmpty(&pe);
540 if (!result) {
541 logprintf(0, "Process Error: fill_thread failed to pop pages, "
542 "bailing\n");
543 break;
544 }
545
546 // Fill the page with pattern
547 result = result && FillPageRandom(&pe);
548 if (!result) break;
549
550 // Put the page back on the queue.
551 result = result && sat_->PutValid(&pe);
552 if (!result) {
553 logprintf(0, "Process Error: fill_thread failed to push pages, "
554 "bailing\n");
555 break;
556 }
557 loops++;
558 }
559
560 // Fill in thread status.
561 pages_copied_ = loops;
562 status_ = result;
563 logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n",
564 thread_num_, status_, pages_copied_);
565 return result;
566 }
567
568
569 // Print error information about a data miscompare.
ProcessError(struct ErrorRecord * error,int priority,const char * message)570 void WorkerThread::ProcessError(struct ErrorRecord *error,
571 int priority,
572 const char *message) {
573 char dimm_string[256] = "";
574
575 int core_id = sched_getcpu();
576
577 // Determine if this is a write or read error.
578 os_->Flush(error->vaddr);
579 error->reread = *(error->vaddr);
580
581 char *good = reinterpret_cast<char*>(&(error->expected));
582 char *bad = reinterpret_cast<char*>(&(error->actual));
583
584 sat_assert(error->expected != error->actual);
585 unsigned int offset = 0;
586 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
587 if (good[offset] != bad[offset])
588 break;
589 }
590
591 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
592
593 // Find physical address if possible.
594 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
595
596 // Pretty print DIMM mapping if available.
597 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
598
599 // Report parseable error.
600 if (priority < 5) {
601 // Run miscompare error through diagnoser for logging and reporting.
602 os_->error_diagnoser_->AddMiscompareError(dimm_string,
603 reinterpret_cast<uint64>
604 (error->vaddr), 1);
605
606 logprintf(priority,
607 "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
608 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
609 message,
610 core_id,
611 CurrentCpusFormat().c_str(),
612 error->vaddr,
613 error->paddr,
614 dimm_string,
615 error->actual,
616 error->reread,
617 error->expected);
618 }
619
620
621 // Overwrite incorrect data with correct data to prevent
622 // future miscompares when this data is reused.
623 *(error->vaddr) = error->expected;
624 os_->Flush(error->vaddr);
625 }
626
627
628
629 // Print error information about a data miscompare.
ProcessError(struct ErrorRecord * error,int priority,const char * message)630 void FileThread::ProcessError(struct ErrorRecord *error,
631 int priority,
632 const char *message) {
633 char dimm_string[256] = "";
634
635 // Determine if this is a write or read error.
636 os_->Flush(error->vaddr);
637 error->reread = *(error->vaddr);
638
639 char *good = reinterpret_cast<char*>(&(error->expected));
640 char *bad = reinterpret_cast<char*>(&(error->actual));
641
642 sat_assert(error->expected != error->actual);
643 unsigned int offset = 0;
644 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
645 if (good[offset] != bad[offset])
646 break;
647 }
648
649 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
650
651 // Find physical address if possible.
652 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
653
654 // Pretty print DIMM mapping if available.
655 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
656
657 // If crc_page_ is valid, ie checking content read back from file,
658 // track src/dst memory addresses. Otherwise catagorize as general
659 // mememory miscompare for CRC checking everywhere else.
660 if (crc_page_ != -1) {
661 int miscompare_byteoffset = static_cast<char*>(error->vbyteaddr) -
662 static_cast<char*>(page_recs_[crc_page_].dst);
663 os_->error_diagnoser_->AddHDDMiscompareError(devicename_,
664 crc_page_,
665 miscompare_byteoffset,
666 page_recs_[crc_page_].src,
667 page_recs_[crc_page_].dst);
668 } else {
669 os_->error_diagnoser_->AddMiscompareError(dimm_string,
670 reinterpret_cast<uint64>
671 (error->vaddr), 1);
672 }
673
674 logprintf(priority,
675 "%s: miscompare on %s at %p(0x%llx:%s): read:0x%016llx, "
676 "reread:0x%016llx expected:0x%016llx\n",
677 message,
678 devicename_.c_str(),
679 error->vaddr,
680 error->paddr,
681 dimm_string,
682 error->actual,
683 error->reread,
684 error->expected);
685
686 // Overwrite incorrect data with correct data to prevent
687 // future miscompares when this data is reused.
688 *(error->vaddr) = error->expected;
689 os_->Flush(error->vaddr);
690 }
691
692
693 // Do a word by word result check of a region.
694 // Print errors on mismatches.
CheckRegion(void * addr,class Pattern * pattern,int64 length,int offset,int64 pattern_offset)695 int WorkerThread::CheckRegion(void *addr,
696 class Pattern *pattern,
697 int64 length,
698 int offset,
699 int64 pattern_offset) {
700 uint64 *memblock = static_cast<uint64*>(addr);
701 const int kErrorLimit = 128;
702 int errors = 0;
703 int overflowerrors = 0; // Count of overflowed errors.
704 bool page_error = false;
705 string errormessage("Hardware Error");
706 struct ErrorRecord
707 recorded[kErrorLimit]; // Queued errors for later printing.
708
709 // For each word in the data region.
710 for (int i = 0; i < length / wordsize_; i++) {
711 uint64 actual = memblock[i];
712 uint64 expected;
713
714 // Determine the value that should be there.
715 datacast_t data;
716 int index = 2 * i + pattern_offset;
717 data.l32.l = pattern->pattern(index);
718 data.l32.h = pattern->pattern(index + 1);
719 expected = data.l64;
720 // Check tags if necessary.
721 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) {
722 expected = addr_to_tag(&memblock[i]);
723 }
724
725
726 // If the value is incorrect, save an error record for later printing.
727 if (actual != expected) {
728 if (errors < kErrorLimit) {
729 recorded[errors].actual = actual;
730 recorded[errors].expected = expected;
731 recorded[errors].vaddr = &memblock[i];
732 errors++;
733 } else {
734 page_error = true;
735 // If we have overflowed the error queue, just print the errors now.
736 logprintf(10, "Log: Error record overflow, too many miscompares!\n");
737 errormessage = "Page Error";
738 break;
739 }
740 }
741 }
742
743 // Find if this is a whole block corruption.
744 if (page_error && !tag_mode_) {
745 int patsize = patternlist_->Size();
746 for (int pat = 0; pat < patsize; pat++) {
747 class Pattern *altpattern = patternlist_->GetPattern(pat);
748 const int kGood = 0;
749 const int kBad = 1;
750 const int kGoodAgain = 2;
751 const int kNoMatch = 3;
752 int state = kGood;
753 unsigned int badstart = 0;
754 unsigned int badend = 0;
755
756 // Don't match against ourself!
757 if (pattern == altpattern)
758 continue;
759
760 for (int i = 0; i < length / wordsize_; i++) {
761 uint64 actual = memblock[i];
762 datacast_t expected;
763 datacast_t possible;
764
765 // Determine the value that should be there.
766 int index = 2 * i + pattern_offset;
767
768 expected.l32.l = pattern->pattern(index);
769 expected.l32.h = pattern->pattern(index + 1);
770
771 possible.l32.l = pattern->pattern(index);
772 possible.l32.h = pattern->pattern(index + 1);
773
774 if (state == kGood) {
775 if (actual == expected.l64) {
776 continue;
777 } else if (actual == possible.l64) {
778 badstart = i;
779 badend = i;
780 state = kBad;
781 continue;
782 } else {
783 state = kNoMatch;
784 break;
785 }
786 } else if (state == kBad) {
787 if (actual == possible.l64) {
788 badend = i;
789 continue;
790 } else if (actual == expected.l64) {
791 state = kGoodAgain;
792 continue;
793 } else {
794 state = kNoMatch;
795 break;
796 }
797 } else if (state == kGoodAgain) {
798 if (actual == expected.l64) {
799 continue;
800 } else {
801 state = kNoMatch;
802 break;
803 }
804 }
805 }
806
807 if ((state == kGoodAgain) || (state == kBad)) {
808 unsigned int blockerrors = badend - badstart + 1;
809 errormessage = "Block Error";
810 // It's okay for the 1st entry to be corrected multiple times,
811 // it will simply be reported twice. Once here and once below
812 // when processing the error queue.
813 ProcessError(&recorded[0], 0, errormessage.c_str());
814 logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
815 "%d bytes from offset 0x%x to 0x%x\n",
816 &memblock[badstart],
817 altpattern->name(), pattern->name(),
818 blockerrors * wordsize_,
819 offset + badstart * wordsize_,
820 offset + badend * wordsize_);
821 }
822 }
823 }
824
825
826 // Process error queue after all errors have been recorded.
827 for (int err = 0; err < errors; err++) {
828 int priority = 5;
829 if (errorcount_ + err < 30)
830 priority = 0; // Bump up the priority for the first few errors.
831 ProcessError(&recorded[err], priority, errormessage.c_str());
832 }
833
834 if (page_error) {
835 // For each word in the data region.
836 for (int i = 0; i < length / wordsize_; i++) {
837 uint64 actual = memblock[i];
838 uint64 expected;
839 datacast_t data;
840 // Determine the value that should be there.
841 int index = 2 * i + pattern_offset;
842
843 data.l32.l = pattern->pattern(index);
844 data.l32.h = pattern->pattern(index + 1);
845 expected = data.l64;
846
847 // Check tags if necessary.
848 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) {
849 expected = addr_to_tag(&memblock[i]);
850 }
851
852 // If the value is incorrect, save an error record for later printing.
853 if (actual != expected) {
854 // If we have overflowed the error queue, print the errors now.
855 struct ErrorRecord er;
856 er.actual = actual;
857 er.expected = expected;
858 er.vaddr = &memblock[i];
859
860 // Do the error printout. This will take a long time and
861 // likely change the machine state.
862 ProcessError(&er, 12, errormessage.c_str());
863 overflowerrors++;
864 }
865 }
866 }
867
868 // Keep track of observed errors.
869 errorcount_ += errors + overflowerrors;
870 return errors + overflowerrors;
871 }
872
GetCopiedData()873 float WorkerThread::GetCopiedData() {
874 return pages_copied_ * sat_->page_length() / kMegabyte;
875 }
876
877 // Calculate the CRC of a region.
878 // Result check if the CRC mismatches.
CrcCheckPage(struct page_entry * srcpe)879 int WorkerThread::CrcCheckPage(struct page_entry *srcpe) {
880 const int blocksize = 4096;
881 const int blockwords = blocksize / wordsize_;
882 int errors = 0;
883
884 const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
885 uint64 *memblock = static_cast<uint64*>(srcpe->addr);
886 int blocks = sat_->page_length() / blocksize;
887 for (int currentblock = 0; currentblock < blocks; currentblock++) {
888 uint64 *memslice = memblock + currentblock * blockwords;
889
890 AdlerChecksum crc;
891 if (tag_mode_) {
892 AdlerAddrCrcC(memslice, blocksize, &crc, srcpe);
893 } else {
894 CalculateAdlerChecksum(memslice, blocksize, &crc);
895 }
896
897 // If the CRC does not match, we'd better look closer.
898 if (!crc.Equals(*expectedcrc)) {
899 logprintf(11, "Log: CrcCheckPage Falling through to slow compare, "
900 "CRC mismatch %s != %s\n",
901 crc.ToHexString().c_str(),
902 expectedcrc->ToHexString().c_str());
903 int errorcount = CheckRegion(memslice,
904 srcpe->pattern,
905 blocksize,
906 currentblock * blocksize, 0);
907 if (errorcount == 0) {
908 logprintf(0, "Log: CrcCheckPage CRC mismatch %s != %s, "
909 "but no miscompares found.\n",
910 crc.ToHexString().c_str(),
911 expectedcrc->ToHexString().c_str());
912 }
913 errors += errorcount;
914 }
915 }
916
917 // For odd length transfers, we should never hit this.
918 int leftovers = sat_->page_length() % blocksize;
919 if (leftovers) {
920 uint64 *memslice = memblock + blocks * blockwords;
921 errors += CheckRegion(memslice,
922 srcpe->pattern,
923 leftovers,
924 blocks * blocksize, 0);
925 }
926 return errors;
927 }
928
929
930 // Print error information about a data miscompare.
ProcessTagError(struct ErrorRecord * error,int priority,const char * message)931 void WorkerThread::ProcessTagError(struct ErrorRecord *error,
932 int priority,
933 const char *message) {
934 char dimm_string[256] = "";
935 char tag_dimm_string[256] = "";
936 bool read_error = false;
937
938 int core_id = sched_getcpu();
939
940 // Determine if this is a write or read error.
941 os_->Flush(error->vaddr);
942 error->reread = *(error->vaddr);
943
944 // Distinguish read and write errors.
945 if (error->actual != error->reread) {
946 read_error = true;
947 }
948
949 sat_assert(error->expected != error->actual);
950
951 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr);
952
953 // Find physical address if possible.
954 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
955 error->tagpaddr = os_->VirtualToPhysical(error->tagvaddr);
956
957 // Pretty print DIMM mapping if available.
958 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
959 // Pretty print DIMM mapping if available.
960 os_->FindDimm(error->tagpaddr, tag_dimm_string, sizeof(tag_dimm_string));
961
962 // Report parseable error.
963 if (priority < 5) {
964 logprintf(priority,
965 "%s: Tag from %p(0x%llx:%s) (%s) "
966 "miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
967 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
968 message,
969 error->tagvaddr, error->tagpaddr,
970 tag_dimm_string,
971 read_error ? "read error" : "write error",
972 core_id,
973 CurrentCpusFormat().c_str(),
974 error->vaddr,
975 error->paddr,
976 dimm_string,
977 error->actual,
978 error->reread,
979 error->expected);
980 }
981
982 errorcount_ += 1;
983
984 // Overwrite incorrect data with correct data to prevent
985 // future miscompares when this data is reused.
986 *(error->vaddr) = error->expected;
987 os_->Flush(error->vaddr);
988 }
989
990
991 // Print out and log a tag error.
ReportTagError(uint64 * mem64,uint64 actual,uint64 tag)992 bool WorkerThread::ReportTagError(
993 uint64 *mem64,
994 uint64 actual,
995 uint64 tag) {
996 struct ErrorRecord er;
997 er.actual = actual;
998
999 er.expected = tag;
1000 er.vaddr = mem64;
1001
1002 // Generate vaddr from tag.
1003 er.tagvaddr = reinterpret_cast<uint64*>(actual);
1004
1005 ProcessTagError(&er, 0, "Hardware Error");
1006 return true;
1007 }
1008
1009 // C implementation of Adler memory copy, with memory tagging.
AdlerAddrMemcpyC(uint64 * dstmem64,uint64 * srcmem64,unsigned int size_in_bytes,AdlerChecksum * checksum,struct page_entry * pe)1010 bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64,
1011 uint64 *srcmem64,
1012 unsigned int size_in_bytes,
1013 AdlerChecksum *checksum,
1014 struct page_entry *pe) {
1015 // Use this data wrapper to access memory with 64bit read/write.
1016 datacast_t data;
1017 datacast_t dstdata;
1018 unsigned int count = size_in_bytes / sizeof(data);
1019
1020 if (count > ((1U) << 19)) {
1021 // Size is too large, must be strictly less than 512 KB.
1022 return false;
1023 }
1024
1025 uint64 a1 = 1;
1026 uint64 a2 = 1;
1027 uint64 b1 = 0;
1028 uint64 b2 = 0;
1029
1030 class Pattern *pattern = pe->pattern;
1031
1032 unsigned int i = 0;
1033 while (i < count) {
1034 // Process 64 bits at a time.
1035 if ((i & 0x7) == 0) {
1036 data.l64 = srcmem64[i];
1037 dstdata.l64 = dstmem64[i];
1038 uint64 src_tag = addr_to_tag(&srcmem64[i]);
1039 uint64 dst_tag = addr_to_tag(&dstmem64[i]);
1040 // Detect if tags have been corrupted.
1041 if (data.l64 != src_tag)
1042 ReportTagError(&srcmem64[i], data.l64, src_tag);
1043 if (dstdata.l64 != dst_tag)
1044 ReportTagError(&dstmem64[i], dstdata.l64, dst_tag);
1045
1046 data.l32.l = pattern->pattern(i << 1);
1047 data.l32.h = pattern->pattern((i << 1) + 1);
1048 a1 = a1 + data.l32.l;
1049 b1 = b1 + a1;
1050 a1 = a1 + data.l32.h;
1051 b1 = b1 + a1;
1052
1053 data.l64 = dst_tag;
1054 dstmem64[i] = data.l64;
1055
1056 } else {
1057 data.l64 = srcmem64[i];
1058 a1 = a1 + data.l32.l;
1059 b1 = b1 + a1;
1060 a1 = a1 + data.l32.h;
1061 b1 = b1 + a1;
1062 dstmem64[i] = data.l64;
1063 }
1064 i++;
1065
1066 data.l64 = srcmem64[i];
1067 a2 = a2 + data.l32.l;
1068 b2 = b2 + a2;
1069 a2 = a2 + data.l32.h;
1070 b2 = b2 + a2;
1071 dstmem64[i] = data.l64;
1072 i++;
1073 }
1074 checksum->Set(a1, a2, b1, b2);
1075 return true;
1076 }
1077
1078 // x86_64 SSE2 assembly implementation of Adler memory copy, with address
1079 // tagging added as a second step. This is useful for debugging failures
1080 // that only occur when SSE / nontemporal writes are used.
AdlerAddrMemcpyWarm(uint64 * dstmem64,uint64 * srcmem64,unsigned int size_in_bytes,AdlerChecksum * checksum,struct page_entry * pe)1081 bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
1082 uint64 *srcmem64,
1083 unsigned int size_in_bytes,
1084 AdlerChecksum *checksum,
1085 struct page_entry *pe) {
1086 // Do ASM copy, ignore checksum.
1087 AdlerChecksum ignored_checksum;
1088 os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
1089
1090 // Force cache flush of both the source and destination addresses.
1091 // length - length of block to flush in cachelines.
1092 // mem_increment - number of dstmem/srcmem values per cacheline.
1093 int length = size_in_bytes / kCacheLineSize;
1094 int mem_increment = kCacheLineSize / sizeof(*dstmem64);
1095 OsLayer::FastFlushSync();
1096 for (int i = 0; i < length; ++i) {
1097 OsLayer::FastFlushHint(dstmem64 + (i * mem_increment));
1098 OsLayer::FastFlushHint(srcmem64 + (i * mem_increment));
1099 }
1100 OsLayer::FastFlushSync();
1101
1102 // Check results.
1103 AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
1104 // Patch up address tags.
1105 TagAddrC(dstmem64, size_in_bytes);
1106 return true;
1107 }
1108
1109 // Retag pages..
TagAddrC(uint64 * memwords,unsigned int size_in_bytes)1110 bool WorkerThread::TagAddrC(uint64 *memwords,
1111 unsigned int size_in_bytes) {
1112 // Mask is the bitmask of indexes used by the pattern.
1113 // It is the pattern size -1. Size is always a power of 2.
1114
1115 // Select tag or data as appropriate.
1116 int length = size_in_bytes / wordsize_;
1117 for (int i = 0; i < length; i += 8) {
1118 datacast_t data;
1119 data.l64 = addr_to_tag(&memwords[i]);
1120 memwords[i] = data.l64;
1121 }
1122 return true;
1123 }
1124
1125 // C implementation of Adler memory crc.
AdlerAddrCrcC(uint64 * srcmem64,unsigned int size_in_bytes,AdlerChecksum * checksum,struct page_entry * pe)1126 bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
1127 unsigned int size_in_bytes,
1128 AdlerChecksum *checksum,
1129 struct page_entry *pe) {
1130 // Use this data wrapper to access memory with 64bit read/write.
1131 datacast_t data;
1132 unsigned int count = size_in_bytes / sizeof(data);
1133
1134 if (count > ((1U) << 19)) {
1135 // Size is too large, must be strictly less than 512 KB.
1136 return false;
1137 }
1138
1139 uint64 a1 = 1;
1140 uint64 a2 = 1;
1141 uint64 b1 = 0;
1142 uint64 b2 = 0;
1143
1144 class Pattern *pattern = pe->pattern;
1145
1146 unsigned int i = 0;
1147 while (i < count) {
1148 // Process 64 bits at a time.
1149 if ((i & 0x7) == 0) {
1150 data.l64 = srcmem64[i];
1151 uint64 src_tag = addr_to_tag(&srcmem64[i]);
1152 // Check that tags match expected.
1153 if (data.l64 != src_tag)
1154 ReportTagError(&srcmem64[i], data.l64, src_tag);
1155
1156 data.l32.l = pattern->pattern(i << 1);
1157 data.l32.h = pattern->pattern((i << 1) + 1);
1158 a1 = a1 + data.l32.l;
1159 b1 = b1 + a1;
1160 a1 = a1 + data.l32.h;
1161 b1 = b1 + a1;
1162 } else {
1163 data.l64 = srcmem64[i];
1164 a1 = a1 + data.l32.l;
1165 b1 = b1 + a1;
1166 a1 = a1 + data.l32.h;
1167 b1 = b1 + a1;
1168 }
1169 i++;
1170
1171 data.l64 = srcmem64[i];
1172 a2 = a2 + data.l32.l;
1173 b2 = b2 + a2;
1174 a2 = a2 + data.l32.h;
1175 b2 = b2 + a2;
1176 i++;
1177 }
1178 checksum->Set(a1, a2, b1, b2);
1179 return true;
1180 }
1181
1182 // Copy a block of memory quickly, while keeping a CRC of the data.
1183 // Result check if the CRC mismatches.
CrcCopyPage(struct page_entry * dstpe,struct page_entry * srcpe)1184 int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
1185 struct page_entry *srcpe) {
1186 int errors = 0;
1187 const int blocksize = 4096;
1188 const int blockwords = blocksize / wordsize_;
1189 int blocks = sat_->page_length() / blocksize;
1190
1191 // Base addresses for memory copy
1192 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr);
1193 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr);
1194 // Remember the expected CRC
1195 const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
1196
1197 for (int currentblock = 0; currentblock < blocks; currentblock++) {
1198 uint64 *targetmem = targetmembase + currentblock * blockwords;
1199 uint64 *sourcemem = sourcemembase + currentblock * blockwords;
1200
1201 AdlerChecksum crc;
1202 if (tag_mode_) {
1203 AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
1204 } else {
1205 AdlerMemcpyC(targetmem, sourcemem, blocksize, &crc);
1206 }
1207
1208 // Investigate miscompares.
1209 if (!crc.Equals(*expectedcrc)) {
1210 logprintf(11, "Log: CrcCopyPage Falling through to slow compare, "
1211 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(),
1212 expectedcrc->ToHexString().c_str());
1213 int errorcount = CheckRegion(sourcemem,
1214 srcpe->pattern,
1215 blocksize,
1216 currentblock * blocksize, 0);
1217 if (errorcount == 0) {
1218 logprintf(0, "Log: CrcCopyPage CRC mismatch %s != %s, "
1219 "but no miscompares found. Retrying with fresh data.\n",
1220 crc.ToHexString().c_str(),
1221 expectedcrc->ToHexString().c_str());
1222 if (!tag_mode_) {
1223 // Copy the data originally read from this region back again.
1224 // This data should have any corruption read originally while
1225 // calculating the CRC.
1226 memcpy(sourcemem, targetmem, blocksize);
1227 errorcount = CheckRegion(sourcemem,
1228 srcpe->pattern,
1229 blocksize,
1230 currentblock * blocksize, 0);
1231 if (errorcount == 0) {
1232 int core_id = sched_getcpu();
1233 logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
1234 "CRC mismatch %s != %s, "
1235 "but no miscompares found on second pass.\n",
1236 core_id, CurrentCpusFormat().c_str(),
1237 crc.ToHexString().c_str(),
1238 expectedcrc->ToHexString().c_str());
1239 struct ErrorRecord er;
1240 er.actual = sourcemem[0];
1241 er.expected = 0xbad00000ull << 32;
1242 er.vaddr = sourcemem;
1243 ProcessError(&er, 0, "Hardware Error");
1244 errors += 1;
1245 errorcount_ ++;
1246 }
1247 }
1248 }
1249 errors += errorcount;
1250 }
1251 }
1252
1253 // For odd length transfers, we should never hit this.
1254 int leftovers = sat_->page_length() % blocksize;
1255 if (leftovers) {
1256 uint64 *targetmem = targetmembase + blocks * blockwords;
1257 uint64 *sourcemem = sourcemembase + blocks * blockwords;
1258
1259 errors += CheckRegion(sourcemem,
1260 srcpe->pattern,
1261 leftovers,
1262 blocks * blocksize, 0);
1263 int leftoverwords = leftovers / wordsize_;
1264 for (int i = 0; i < leftoverwords; i++) {
1265 targetmem[i] = sourcemem[i];
1266 }
1267 }
1268
1269 // Update pattern reference to reflect new contents.
1270 dstpe->pattern = srcpe->pattern;
1271
1272 // Clean clean clean the errors away.
1273 if (errors) {
1274 // TODO(nsanders): Maybe we should patch rather than fill? Filling may
1275 // cause bad data to be propogated across the page.
1276 FillPage(dstpe);
1277 }
1278 return errors;
1279 }
1280
1281
1282
1283 // Invert a block of memory quickly, traversing downwards.
InvertPageDown(struct page_entry * srcpe)1284 int InvertThread::InvertPageDown(struct page_entry *srcpe) {
1285 const int blocksize = 4096;
1286 const int blockwords = blocksize / wordsize_;
1287 int blocks = sat_->page_length() / blocksize;
1288
1289 // Base addresses for memory copy
1290 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr);
1291
1292 for (int currentblock = blocks-1; currentblock >= 0; currentblock--) {
1293 unsigned int *sourcemem = sourcemembase + currentblock * blockwords;
1294 for (int i = blockwords - 32; i >= 0; i -= 32) {
1295 for (int index = i + 31; index >= i; --index) {
1296 unsigned int actual = sourcemem[index];
1297 sourcemem[index] = ~actual;
1298 }
1299 OsLayer::FastFlush(&sourcemem[i]);
1300 }
1301 }
1302
1303 return 0;
1304 }
1305
1306 // Invert a block of memory, traversing upwards.
InvertPageUp(struct page_entry * srcpe)1307 int InvertThread::InvertPageUp(struct page_entry *srcpe) {
1308 const int blocksize = 4096;
1309 const int blockwords = blocksize / wordsize_;
1310 int blocks = sat_->page_length() / blocksize;
1311
1312 // Base addresses for memory copy
1313 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr);
1314
1315 for (int currentblock = 0; currentblock < blocks; currentblock++) {
1316 unsigned int *sourcemem = sourcemembase + currentblock * blockwords;
1317 for (int i = 0; i < blockwords; i += 32) {
1318 for (int index = i; index <= i + 31; ++index) {
1319 unsigned int actual = sourcemem[index];
1320 sourcemem[index] = ~actual;
1321 }
1322 OsLayer::FastFlush(&sourcemem[i]);
1323 }
1324 }
1325 return 0;
1326 }
1327
1328 // Copy a block of memory quickly, while keeping a CRC of the data.
1329 // Result check if the CRC mismatches. Warm the CPU while running
CrcWarmCopyPage(struct page_entry * dstpe,struct page_entry * srcpe)1330 int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
1331 struct page_entry *srcpe) {
1332 int errors = 0;
1333 const int blocksize = 4096;
1334 const int blockwords = blocksize / wordsize_;
1335 int blocks = sat_->page_length() / blocksize;
1336
1337 // Base addresses for memory copy
1338 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr);
1339 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr);
1340 // Remember the expected CRC
1341 const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
1342
1343 for (int currentblock = 0; currentblock < blocks; currentblock++) {
1344 uint64 *targetmem = targetmembase + currentblock * blockwords;
1345 uint64 *sourcemem = sourcemembase + currentblock * blockwords;
1346
1347 AdlerChecksum crc;
1348 if (tag_mode_) {
1349 AdlerAddrMemcpyWarm(targetmem, sourcemem, blocksize, &crc, srcpe);
1350 } else {
1351 os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc);
1352 }
1353
1354 // Investigate miscompares.
1355 if (!crc.Equals(*expectedcrc)) {
1356 logprintf(11, "Log: CrcWarmCopyPage Falling through to slow compare, "
1357 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(),
1358 expectedcrc->ToHexString().c_str());
1359 int errorcount = CheckRegion(sourcemem,
1360 srcpe->pattern,
1361 blocksize,
1362 currentblock * blocksize, 0);
1363 if (errorcount == 0) {
1364 logprintf(0, "Log: CrcWarmCopyPage CRC mismatch expected: %s != actual: %s, "
1365 "but no miscompares found. Retrying with fresh data.\n",
1366 expectedcrc->ToHexString().c_str(),
1367 crc.ToHexString().c_str() );
1368 if (!tag_mode_) {
1369 // Copy the data originally read from this region back again.
1370 // This data should have any corruption read originally while
1371 // calculating the CRC.
1372 memcpy(sourcemem, targetmem, blocksize);
1373 errorcount = CheckRegion(sourcemem,
1374 srcpe->pattern,
1375 blocksize,
1376 currentblock * blocksize, 0);
1377 if (errorcount == 0) {
1378 int core_id = sched_getcpu();
1379 logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
1380 "CRC mismatch %s != %s, "
1381 "but no miscompares found on second pass.\n",
1382 core_id, CurrentCpusFormat().c_str(),
1383 crc.ToHexString().c_str(),
1384 expectedcrc->ToHexString().c_str());
1385 struct ErrorRecord er;
1386 er.actual = sourcemem[0];
1387 er.expected = 0xbad;
1388 er.vaddr = sourcemem;
1389 ProcessError(&er, 0, "Hardware Error");
1390 errors ++;
1391 errorcount_ ++;
1392 }
1393 }
1394 }
1395 errors += errorcount;
1396 }
1397 }
1398
1399 // For odd length transfers, we should never hit this.
1400 int leftovers = sat_->page_length() % blocksize;
1401 if (leftovers) {
1402 uint64 *targetmem = targetmembase + blocks * blockwords;
1403 uint64 *sourcemem = sourcemembase + blocks * blockwords;
1404
1405 errors += CheckRegion(sourcemem,
1406 srcpe->pattern,
1407 leftovers,
1408 blocks * blocksize, 0);
1409 int leftoverwords = leftovers / wordsize_;
1410 for (int i = 0; i < leftoverwords; i++) {
1411 targetmem[i] = sourcemem[i];
1412 }
1413 }
1414
1415 // Update pattern reference to reflect new contents.
1416 dstpe->pattern = srcpe->pattern;
1417
1418 // Clean clean clean the errors away.
1419 if (errors) {
1420 // TODO(nsanders): Maybe we should patch rather than fill? Filling may
1421 // cause bad data to be propogated across the page.
1422 FillPage(dstpe);
1423 }
1424 return errors;
1425 }
1426
1427
1428
1429 // Memory check work loop. Execute until done, then exhaust pages.
Work()1430 bool CheckThread::Work() {
1431 struct page_entry pe;
1432 bool result = true;
1433 int64 loops = 0;
1434
1435 logprintf(9, "Log: Starting Check thread %d\n", thread_num_);
1436
1437 // We want to check all the pages, and
1438 // stop when there aren't any left.
1439 while (true) {
1440 result = result && sat_->GetValid(&pe);
1441 if (!result) {
1442 if (IsReadyToRunNoPause())
1443 logprintf(0, "Process Error: check_thread failed to pop pages, "
1444 "bailing\n");
1445 else
1446 result = true;
1447 break;
1448 }
1449
1450 // Do the result check.
1451 CrcCheckPage(&pe);
1452
1453 // Push pages back on the valid queue if we are still going,
1454 // throw them out otherwise.
1455 if (IsReadyToRunNoPause())
1456 result = result && sat_->PutValid(&pe);
1457 else
1458 result = result && sat_->PutEmpty(&pe);
1459 if (!result) {
1460 logprintf(0, "Process Error: check_thread failed to push pages, "
1461 "bailing\n");
1462 break;
1463 }
1464 loops++;
1465 }
1466
1467 pages_copied_ = loops;
1468 status_ = result;
1469 logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n",
1470 thread_num_, status_, pages_copied_);
1471 return result;
1472 }
1473
1474
1475 // Memory copy work loop. Execute until marked done.
Work()1476 bool CopyThread::Work() {
1477 struct page_entry src;
1478 struct page_entry dst;
1479 bool result = true;
1480 int64 loops = 0;
1481
1482 logprintf(9, "Log: Starting copy thread %d: cpu %s, mem %x\n",
1483 thread_num_, cpuset_format(&cpu_mask_).c_str(), tag_);
1484
1485 while (IsReadyToRun()) {
1486 // Pop the needed pages.
1487 result = result && sat_->GetValid(&src, tag_);
1488 result = result && sat_->GetEmpty(&dst, tag_);
1489 if (!result) {
1490 logprintf(0, "Process Error: copy_thread failed to pop pages, "
1491 "bailing\n");
1492 break;
1493 }
1494
1495 // Force errors for unittests.
1496 if (sat_->error_injection()) {
1497 if (loops == 8) {
1498 char *addr = reinterpret_cast<char*>(src.addr);
1499 int offset = random() % sat_->page_length();
1500 addr[offset] = 0xba;
1501 }
1502 }
1503
1504 // We can use memcpy, or CRC check while we copy.
1505 if (sat_->warm()) {
1506 CrcWarmCopyPage(&dst, &src);
1507 } else if (sat_->strict()) {
1508 CrcCopyPage(&dst, &src);
1509 } else {
1510 memcpy(dst.addr, src.addr, sat_->page_length());
1511 dst.pattern = src.pattern;
1512 }
1513
1514 result = result && sat_->PutValid(&dst);
1515 result = result && sat_->PutEmpty(&src);
1516
1517 // Copy worker-threads yield themselves at the end of each copy loop,
1518 // to avoid threads from preempting each other in the middle of the inner
1519 // copy-loop. Cooperations between Copy worker-threads results in less
1520 // unnecessary cache thrashing (which happens when context-switching in the
1521 // middle of the inner copy-loop).
1522 YieldSelf();
1523
1524 if (!result) {
1525 logprintf(0, "Process Error: copy_thread failed to push pages, "
1526 "bailing\n");
1527 break;
1528 }
1529 loops++;
1530 }
1531
1532 pages_copied_ = loops;
1533 status_ = result;
1534 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
1535 thread_num_, status_, pages_copied_);
1536 return result;
1537 }
1538
1539 // Memory invert work loop. Execute until marked done.
Work()1540 bool InvertThread::Work() {
1541 struct page_entry src;
1542 bool result = true;
1543 int64 loops = 0;
1544
1545 logprintf(9, "Log: Starting invert thread %d\n", thread_num_);
1546
1547 while (IsReadyToRun()) {
1548 // Pop the needed pages.
1549 result = result && sat_->GetValid(&src);
1550 if (!result) {
1551 logprintf(0, "Process Error: invert_thread failed to pop pages, "
1552 "bailing\n");
1553 break;
1554 }
1555
1556 if (sat_->strict())
1557 CrcCheckPage(&src);
1558
1559 // For the same reason CopyThread yields itself (see YieldSelf comment
1560 // in CopyThread::Work(), InvertThread yields itself after each invert
1561 // operation to improve cooperation between different worker threads
1562 // stressing the memory/cache.
1563 InvertPageUp(&src);
1564 YieldSelf();
1565 InvertPageDown(&src);
1566 YieldSelf();
1567 InvertPageDown(&src);
1568 YieldSelf();
1569 InvertPageUp(&src);
1570 YieldSelf();
1571
1572 if (sat_->strict())
1573 CrcCheckPage(&src);
1574
1575 result = result && sat_->PutValid(&src);
1576 if (!result) {
1577 logprintf(0, "Process Error: invert_thread failed to push pages, "
1578 "bailing\n");
1579 break;
1580 }
1581 loops++;
1582 }
1583
1584 pages_copied_ = loops * 2;
1585 status_ = result;
1586 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
1587 thread_num_, status_, pages_copied_);
1588 return result;
1589 }
1590
1591
1592 // Set file name to use for File IO.
SetFile(const char * filename_init)1593 void FileThread::SetFile(const char *filename_init) {
1594 filename_ = filename_init;
1595 devicename_ = os_->FindFileDevice(filename_);
1596 }
1597
1598 // Open the file for access.
OpenFile(int * pfile)1599 bool FileThread::OpenFile(int *pfile) {
1600 int flags = O_RDWR | O_CREAT | O_SYNC;
1601 int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
1602 if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
1603 fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
1604 os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
1605 }
1606 if (fd < 0) {
1607 logprintf(0, "Process Error: Failed to create file %s!!\n",
1608 filename_.c_str());
1609 pages_copied_ = 0;
1610 return false;
1611 }
1612 *pfile = fd;
1613 return true;
1614 }
1615
1616 // Close the file.
CloseFile(int fd)1617 bool FileThread::CloseFile(int fd) {
1618 close(fd);
1619 return true;
1620 }
1621
1622 // Check sector tagging.
SectorTagPage(struct page_entry * src,int block)1623 bool FileThread::SectorTagPage(struct page_entry *src, int block) {
1624 int page_length = sat_->page_length();
1625 struct FileThread::SectorTag *tag =
1626 (struct FileThread::SectorTag *)(src->addr);
1627
1628 // Tag each sector.
1629 unsigned char magic = ((0xba + thread_num_) & 0xff);
1630 for (int sec = 0; sec < page_length / 512; sec++) {
1631 tag[sec].magic = magic;
1632 tag[sec].block = block & 0xff;
1633 tag[sec].sector = sec & 0xff;
1634 tag[sec].pass = pass_ & 0xff;
1635 }
1636 return true;
1637 }
1638
WritePageToFile(int fd,struct page_entry * src)1639 bool FileThread::WritePageToFile(int fd, struct page_entry *src) {
1640 int page_length = sat_->page_length();
1641 // Fill the file with our data.
1642 int64 size = write(fd, src->addr, page_length);
1643
1644 if (size != page_length) {
1645 os_->ErrorReport(devicename_.c_str(), "write-error", 1);
1646 errorcount_++;
1647 logprintf(0, "Block Error: file_thread failed to write, "
1648 "bailing\n");
1649 return false;
1650 }
1651 return true;
1652 }
1653
1654 // Write the data to the file.
WritePages(int fd)1655 bool FileThread::WritePages(int fd) {
1656 int strict = sat_->strict();
1657
1658 // Start fresh at beginning of file for each batch of pages.
1659 lseek64(fd, 0, SEEK_SET);
1660 for (int i = 0; i < sat_->disk_pages(); i++) {
1661 struct page_entry src;
1662 if (!GetValidPage(&src))
1663 return false;
1664 // Save expected pattern.
1665 page_recs_[i].pattern = src.pattern;
1666 page_recs_[i].src = src.addr;
1667
1668 // Check data correctness.
1669 if (strict)
1670 CrcCheckPage(&src);
1671
1672 SectorTagPage(&src, i);
1673
1674 bool result = WritePageToFile(fd, &src);
1675
1676 if (!PutEmptyPage(&src))
1677 return false;
1678
1679 if (!result)
1680 return false;
1681 }
1682 return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
1683 }
1684
1685 // Copy data from file into memory block.
ReadPageFromFile(int fd,struct page_entry * dst)1686 bool FileThread::ReadPageFromFile(int fd, struct page_entry *dst) {
1687 int page_length = sat_->page_length();
1688
1689 // Do the actual read.
1690 int64 size = read(fd, dst->addr, page_length);
1691 if (size != page_length) {
1692 os_->ErrorReport(devicename_.c_str(), "read-error", 1);
1693 logprintf(0, "Block Error: file_thread failed to read, "
1694 "bailing\n");
1695 errorcount_++;
1696 return false;
1697 }
1698 return true;
1699 }
1700
1701 // Check sector tagging.
SectorValidatePage(const struct PageRec & page,struct page_entry * dst,int block)1702 bool FileThread::SectorValidatePage(const struct PageRec &page,
1703 struct page_entry *dst, int block) {
1704 // Error injection.
1705 static int calls = 0;
1706 calls++;
1707
1708 // Do sector tag compare.
1709 int firstsector = -1;
1710 int lastsector = -1;
1711 bool badsector = false;
1712 int page_length = sat_->page_length();
1713
1714 // Cast data block into an array of tagged sectors.
1715 struct FileThread::SectorTag *tag =
1716 (struct FileThread::SectorTag *)(dst->addr);
1717
1718 sat_assert(sizeof(*tag) == 512);
1719
1720 // Error injection.
1721 if (sat_->error_injection()) {
1722 if (calls == 2) {
1723 for (int badsec = 8; badsec < 17; badsec++)
1724 tag[badsec].pass = 27;
1725 }
1726 if (calls == 18) {
1727 (static_cast<int32*>(dst->addr))[27] = 0xbadda7a;
1728 }
1729 }
1730
1731 // Check each sector for the correct tag we added earlier,
1732 // then revert the tag to the to normal data pattern.
1733 unsigned char magic = ((0xba + thread_num_) & 0xff);
1734 for (int sec = 0; sec < page_length / 512; sec++) {
1735 // Check magic tag.
1736 if ((tag[sec].magic != magic) ||
1737 (tag[sec].block != (block & 0xff)) ||
1738 (tag[sec].sector != (sec & 0xff)) ||
1739 (tag[sec].pass != (pass_ & 0xff))) {
1740 // Offset calculation for tag location.
1741 int offset = sec * sizeof(SectorTag);
1742 if (tag[sec].block != (block & 0xff))
1743 offset += 1 * sizeof(uint8);
1744 else if (tag[sec].sector != (sec & 0xff))
1745 offset += 2 * sizeof(uint8);
1746 else if (tag[sec].pass != (pass_ & 0xff))
1747 offset += 3 * sizeof(uint8);
1748
1749 // Run sector tag error through diagnoser for logging and reporting.
1750 errorcount_ += 1;
1751 os_->error_diagnoser_->AddHDDSectorTagError(devicename_, tag[sec].block,
1752 offset,
1753 tag[sec].sector,
1754 page.src, page.dst);
1755
1756 logprintf(5, "Sector Error: Sector tag @ 0x%x, pass %d/%d. "
1757 "sec %x/%x, block %d/%d, magic %x/%x, File: %s \n",
1758 block * page_length + 512 * sec,
1759 (pass_ & 0xff), (unsigned int)tag[sec].pass,
1760 sec, (unsigned int)tag[sec].sector,
1761 block, (unsigned int)tag[sec].block,
1762 magic, (unsigned int)tag[sec].magic,
1763 filename_.c_str());
1764
1765 // Keep track of first and last bad sector.
1766 if (firstsector == -1)
1767 firstsector = (block * page_length / 512) + sec;
1768 lastsector = (block * page_length / 512) + sec;
1769 badsector = true;
1770 }
1771 // Patch tag back to proper pattern.
1772 unsigned int *addr = (unsigned int *)(&tag[sec]);
1773 *addr = dst->pattern->pattern(512 * sec / sizeof(*addr));
1774 }
1775
1776 // If we found sector errors:
1777 if (badsector == true) {
1778 logprintf(5, "Log: file sector miscompare at offset %x-%x. File: %s\n",
1779 firstsector * 512,
1780 ((lastsector + 1) * 512) - 1,
1781 filename_.c_str());
1782
1783 // Either exit immediately, or patch the data up and continue.
1784 if (sat_->stop_on_error()) {
1785 exit(1);
1786 } else {
1787 // Patch up bad pages.
1788 for (int block = (firstsector * 512) / page_length;
1789 block <= (lastsector * 512) / page_length;
1790 block++) {
1791 unsigned int *memblock = static_cast<unsigned int *>(dst->addr);
1792 int length = page_length / wordsize_;
1793 for (int i = 0; i < length; i++) {
1794 memblock[i] = dst->pattern->pattern(i);
1795 }
1796 }
1797 }
1798 }
1799 return true;
1800 }
1801
1802 // Get memory for an incoming data transfer..
PagePrepare()1803 bool FileThread::PagePrepare() {
1804 // We can only do direct IO to SAT pages if it is normal mem.
1805 page_io_ = os_->normal_mem();
1806
1807 // Init a local buffer if we need it.
1808 if (!page_io_) {
1809 #ifdef HAVE_POSIX_MEMALIGN
1810 int result = posix_memalign(&local_page_, 512, sat_->page_length());
1811 #else
1812 local_page_ = memalign(512, sat_->page_length());
1813 int result = (local_page_ == 0);
1814 #endif
1815 if (result) {
1816 logprintf(0, "Process Error: disk thread posix_memalign "
1817 "returned %d (fail)\n",
1818 result);
1819 status_ = false;
1820 return false;
1821 }
1822 }
1823 return true;
1824 }
1825
1826
1827 // Remove memory allocated for data transfer.
PageTeardown()1828 bool FileThread::PageTeardown() {
1829 // Free a local buffer if we need to.
1830 if (!page_io_) {
1831 free(local_page_);
1832 }
1833 return true;
1834 }
1835
1836
1837
1838 // Get memory for an incoming data transfer..
GetEmptyPage(struct page_entry * dst)1839 bool FileThread::GetEmptyPage(struct page_entry *dst) {
1840 if (page_io_) {
1841 if (!sat_->GetEmpty(dst))
1842 return false;
1843 } else {
1844 dst->addr = local_page_;
1845 dst->offset = 0;
1846 dst->pattern = 0;
1847 }
1848 return true;
1849 }
1850
1851 // Get memory for an outgoing data transfer..
GetValidPage(struct page_entry * src)1852 bool FileThread::GetValidPage(struct page_entry *src) {
1853 struct page_entry tmp;
1854 if (!sat_->GetValid(&tmp))
1855 return false;
1856 if (page_io_) {
1857 *src = tmp;
1858 return true;
1859 } else {
1860 src->addr = local_page_;
1861 src->offset = 0;
1862 CrcCopyPage(src, &tmp);
1863 if (!sat_->PutValid(&tmp))
1864 return false;
1865 }
1866 return true;
1867 }
1868
1869
1870 // Throw out a used empty page.
PutEmptyPage(struct page_entry * src)1871 bool FileThread::PutEmptyPage(struct page_entry *src) {
1872 if (page_io_) {
1873 if (!sat_->PutEmpty(src))
1874 return false;
1875 }
1876 return true;
1877 }
1878
1879 // Throw out a used, filled page.
PutValidPage(struct page_entry * src)1880 bool FileThread::PutValidPage(struct page_entry *src) {
1881 if (page_io_) {
1882 if (!sat_->PutValid(src))
1883 return false;
1884 }
1885 return true;
1886 }
1887
1888 // Copy data from file into memory blocks.
ReadPages(int fd)1889 bool FileThread::ReadPages(int fd) {
1890 int page_length = sat_->page_length();
1891 int strict = sat_->strict();
1892 bool result = true;
1893
1894 // Read our data back out of the file, into it's new location.
1895 lseek64(fd, 0, SEEK_SET);
1896 for (int i = 0; i < sat_->disk_pages(); i++) {
1897 struct page_entry dst;
1898 if (!GetEmptyPage(&dst))
1899 return false;
1900 // Retrieve expected pattern.
1901 dst.pattern = page_recs_[i].pattern;
1902 // Update page recordpage record.
1903 page_recs_[i].dst = dst.addr;
1904
1905 // Read from the file into destination page.
1906 if (!ReadPageFromFile(fd, &dst)) {
1907 PutEmptyPage(&dst);
1908 return false;
1909 }
1910
1911 SectorValidatePage(page_recs_[i], &dst, i);
1912
1913 // Ensure that the transfer ended up with correct data.
1914 if (strict) {
1915 // Record page index currently CRC checked.
1916 crc_page_ = i;
1917 int errors = CrcCheckPage(&dst);
1918 if (errors) {
1919 logprintf(5, "Log: file miscompare at block %d, "
1920 "offset %x-%x. File: %s\n",
1921 i, i * page_length, ((i + 1) * page_length) - 1,
1922 filename_.c_str());
1923 result = false;
1924 }
1925 crc_page_ = -1;
1926 errorcount_ += errors;
1927 }
1928 if (!PutValidPage(&dst))
1929 return false;
1930 }
1931 return result;
1932 }
1933
1934 // File IO work loop. Execute until marked done.
Work()1935 bool FileThread::Work() {
1936 bool result = true;
1937 int64 loops = 0;
1938
1939 logprintf(9, "Log: Starting file thread %d, file %s, device %s\n",
1940 thread_num_,
1941 filename_.c_str(),
1942 devicename_.c_str());
1943
1944 if (!PagePrepare()) {
1945 status_ = false;
1946 return false;
1947 }
1948
1949 // Open the data IO file.
1950 int fd = 0;
1951 if (!OpenFile(&fd)) {
1952 status_ = false;
1953 return false;
1954 }
1955
1956 pass_ = 0;
1957
1958 // Load patterns into page records.
1959 page_recs_ = new struct PageRec[sat_->disk_pages()];
1960 for (int i = 0; i < sat_->disk_pages(); i++) {
1961 page_recs_[i].pattern = new class Pattern();
1962 }
1963
1964 // Loop until done.
1965 while (IsReadyToRun()) {
1966 // Do the file write.
1967 if (!(result = result && WritePages(fd)))
1968 break;
1969
1970 // Do the file read.
1971 if (!(result = result && ReadPages(fd)))
1972 break;
1973
1974 loops++;
1975 pass_ = loops;
1976 }
1977
1978 pages_copied_ = loops * sat_->disk_pages();
1979
1980 // Clean up.
1981 CloseFile(fd);
1982 PageTeardown();
1983
1984 logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
1985 thread_num_, status_, pages_copied_);
1986 // Failure to read from device indicates hardware,
1987 // rather than procedural SW error.
1988 status_ = true;
1989 return true;
1990 }
1991
IsNetworkStopSet()1992 bool NetworkThread::IsNetworkStopSet() {
1993 return !IsReadyToRunNoPause();
1994 }
1995
IsNetworkStopSet()1996 bool NetworkSlaveThread::IsNetworkStopSet() {
1997 // This thread has no completion status.
1998 // It finishes whever there is no more data to be
1999 // passed back.
2000 return true;
2001 }
2002
2003 // Set ip name to use for Network IO.
SetIP(const char * ipaddr_init)2004 void NetworkThread::SetIP(const char *ipaddr_init) {
2005 strncpy(ipaddr_, ipaddr_init, 256);
2006 }
2007
2008 // Create a socket.
2009 // Return 0 on error.
CreateSocket(int * psocket)2010 bool NetworkThread::CreateSocket(int *psocket) {
2011 int sock = socket(AF_INET, SOCK_STREAM, 0);
2012 if (sock == -1) {
2013 logprintf(0, "Process Error: Cannot open socket\n");
2014 pages_copied_ = 0;
2015 status_ = false;
2016 return false;
2017 }
2018 *psocket = sock;
2019 return true;
2020 }
2021
2022 // Close the socket.
CloseSocket(int sock)2023 bool NetworkThread::CloseSocket(int sock) {
2024 close(sock);
2025 return true;
2026 }
2027
2028 // Initiate the tcp connection.
Connect(int sock)2029 bool NetworkThread::Connect(int sock) {
2030 struct sockaddr_in dest_addr;
2031 dest_addr.sin_family = AF_INET;
2032 dest_addr.sin_port = htons(kNetworkPort);
2033 memset(&(dest_addr.sin_zero), '\0', sizeof(dest_addr.sin_zero));
2034
2035 // Translate dot notation to u32.
2036 if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) {
2037 logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_);
2038 pages_copied_ = 0;
2039 status_ = false;
2040 return false;
2041 }
2042
2043 if (-1 == connect(sock, reinterpret_cast<struct sockaddr *>(&dest_addr),
2044 sizeof(struct sockaddr))) {
2045 logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_);
2046 pages_copied_ = 0;
2047 status_ = false;
2048 return false;
2049 }
2050 return true;
2051 }
2052
2053 // Initiate the tcp connection.
Listen()2054 bool NetworkListenThread::Listen() {
2055 struct sockaddr_in sa;
2056
2057 memset(&(sa.sin_zero), '\0', sizeof(sa.sin_zero));
2058
2059 sa.sin_family = AF_INET;
2060 sa.sin_addr.s_addr = INADDR_ANY;
2061 sa.sin_port = htons(kNetworkPort);
2062
2063 if (-1 == ::bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
2064 char buf[256];
2065 sat_strerror(errno, buf, sizeof(buf));
2066 logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
2067 pages_copied_ = 0;
2068 status_ = false;
2069 return false;
2070 }
2071 listen(sock_, 3);
2072 return true;
2073 }
2074
2075 // Wait for a connection from a network traffic generation thread.
Wait()2076 bool NetworkListenThread::Wait() {
2077 fd_set rfds;
2078 struct timeval tv;
2079 int retval;
2080
2081 // Watch sock_ to see when it has input.
2082 FD_ZERO(&rfds);
2083 FD_SET(sock_, &rfds);
2084 // Wait up to five seconds.
2085 tv.tv_sec = 5;
2086 tv.tv_usec = 0;
2087
2088 retval = select(sock_ + 1, &rfds, NULL, NULL, &tv);
2089
2090 return (retval > 0);
2091 }
2092
2093 // Wait for a connection from a network traffic generation thread.
GetConnection(int * pnewsock)2094 bool NetworkListenThread::GetConnection(int *pnewsock) {
2095 struct sockaddr_in sa;
2096 socklen_t size = sizeof(struct sockaddr_in);
2097
2098 int newsock = accept(sock_, reinterpret_cast<struct sockaddr *>(&sa), &size);
2099 if (newsock < 0) {
2100 logprintf(0, "Process Error: Did not receive connection\n");
2101 pages_copied_ = 0;
2102 status_ = false;
2103 return false;
2104 }
2105 *pnewsock = newsock;
2106 return true;
2107 }
2108
2109 // Send a page, return false if a page was not sent.
SendPage(int sock,struct page_entry * src)2110 bool NetworkThread::SendPage(int sock, struct page_entry *src) {
2111 int page_length = sat_->page_length();
2112 char *address = static_cast<char*>(src->addr);
2113
2114 // Send our data over the network.
2115 int size = page_length;
2116 while (size) {
2117 int transferred = send(sock, address + (page_length - size), size, 0);
2118 if ((transferred == 0) || (transferred == -1)) {
2119 if (!IsNetworkStopSet()) {
2120 char buf[256] = "";
2121 sat_strerror(errno, buf, sizeof(buf));
2122 logprintf(0, "Process Error: Thread %d, "
2123 "Network write failed, bailing. (%s)\n",
2124 thread_num_, buf);
2125 status_ = false;
2126 }
2127 return false;
2128 }
2129 size = size - transferred;
2130 }
2131 return true;
2132 }
2133
2134 // Receive a page. Return false if a page was not received.
ReceivePage(int sock,struct page_entry * dst)2135 bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
2136 int page_length = sat_->page_length();
2137 char *address = static_cast<char*>(dst->addr);
2138
2139 // Maybe we will get our data back again, maybe not.
2140 int size = page_length;
2141 while (size) {
2142 int transferred = recv(sock, address + (page_length - size), size, 0);
2143 if ((transferred == 0) || (transferred == -1)) {
2144 // Typically network slave thread should exit as network master
2145 // thread stops sending data.
2146 if (IsNetworkStopSet()) {
2147 int err = errno;
2148 if (transferred == 0 && err == 0) {
2149 // Two system setups will not sync exactly,
2150 // allow early exit, but log it.
2151 logprintf(0, "Log: Net thread did not receive any data, exiting.\n");
2152 } else {
2153 char buf[256] = "";
2154 sat_strerror(err, buf, sizeof(buf));
2155 // Print why we failed.
2156 logprintf(0, "Process Error: Thread %d, "
2157 "Network read failed, bailing (%s).\n",
2158 thread_num_, buf);
2159 status_ = false;
2160 // Print arguments and results.
2161 logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n",
2162 sock, address + (page_length - size),
2163 size, transferred, err);
2164 if ((transferred == 0) &&
2165 (page_length - size < 512) &&
2166 (page_length - size > 0)) {
2167 // Print null terminated data received, to see who's been
2168 // sending us supicious unwanted data.
2169 address[page_length - size] = 0;
2170 logprintf(0, "Log: received %d bytes: '%s'\n",
2171 page_length - size, address);
2172 }
2173 }
2174 }
2175 return false;
2176 }
2177 size = size - transferred;
2178 }
2179 return true;
2180 }
2181
2182 // Network IO work loop. Execute until marked done.
2183 // Return true if the thread ran as expected.
Work()2184 bool NetworkThread::Work() {
2185 logprintf(9, "Log: Starting network thread %d, ip %s\n",
2186 thread_num_,
2187 ipaddr_);
2188
2189 // Make a socket.
2190 int sock = 0;
2191 if (!CreateSocket(&sock))
2192 return false;
2193
2194 // Network IO loop requires network slave thread to have already initialized.
2195 // We will sleep here for awhile to ensure that the slave thread will be
2196 // listening by the time we connect.
2197 // Sleep for 15 seconds.
2198 sat_sleep(15);
2199 logprintf(9, "Log: Starting execution of network thread %d, ip %s\n",
2200 thread_num_,
2201 ipaddr_);
2202
2203
2204 // Connect to a slave thread.
2205 if (!Connect(sock))
2206 return false;
2207
2208 // Loop until done.
2209 bool result = true;
2210 int strict = sat_->strict();
2211 int64 loops = 0;
2212 while (IsReadyToRun()) {
2213 struct page_entry src;
2214 struct page_entry dst;
2215 result = result && sat_->GetValid(&src);
2216 result = result && sat_->GetEmpty(&dst);
2217 if (!result) {
2218 logprintf(0, "Process Error: net_thread failed to pop pages, "
2219 "bailing\n");
2220 break;
2221 }
2222
2223 // Check data correctness.
2224 if (strict)
2225 CrcCheckPage(&src);
2226
2227 // Do the network write.
2228 if (!(result = result && SendPage(sock, &src)))
2229 break;
2230
2231 // Update pattern reference to reflect new contents.
2232 dst.pattern = src.pattern;
2233
2234 // Do the network read.
2235 if (!(result = result && ReceivePage(sock, &dst)))
2236 break;
2237
2238 // Ensure that the transfer ended up with correct data.
2239 if (strict)
2240 CrcCheckPage(&dst);
2241
2242 // Return all of our pages to the queue.
2243 result = result && sat_->PutValid(&dst);
2244 result = result && sat_->PutEmpty(&src);
2245 if (!result) {
2246 logprintf(0, "Process Error: net_thread failed to push pages, "
2247 "bailing\n");
2248 break;
2249 }
2250 loops++;
2251 }
2252
2253 pages_copied_ = loops;
2254 status_ = result;
2255
2256 // Clean up.
2257 CloseSocket(sock);
2258
2259 logprintf(9, "Log: Completed %d: network thread status %d, "
2260 "%d pages copied\n",
2261 thread_num_, status_, pages_copied_);
2262 return result;
2263 }
2264
2265 // Spawn slave threads for incoming connections.
SpawnSlave(int newsock,int threadid)2266 bool NetworkListenThread::SpawnSlave(int newsock, int threadid) {
2267 logprintf(12, "Log: Listen thread spawning slave\n");
2268
2269 // Spawn slave thread, to reflect network traffic back to sender.
2270 ChildWorker *child_worker = new ChildWorker;
2271 child_worker->thread.SetSock(newsock);
2272 child_worker->thread.InitThread(threadid, sat_, os_, patternlist_,
2273 &child_worker->status);
2274 child_worker->status.Initialize();
2275 child_worker->thread.SpawnThread();
2276 child_workers_.push_back(child_worker);
2277
2278 return true;
2279 }
2280
2281 // Reap slave threads.
ReapSlaves()2282 bool NetworkListenThread::ReapSlaves() {
2283 bool result = true;
2284 // Gather status and reap threads.
2285 logprintf(12, "Log: Joining all outstanding threads\n");
2286
2287 for (size_t i = 0; i < child_workers_.size(); i++) {
2288 NetworkSlaveThread& child_thread = child_workers_[i]->thread;
2289 logprintf(12, "Log: Joining slave thread %d\n", i);
2290 child_thread.JoinThread();
2291 if (child_thread.GetStatus() != 1) {
2292 logprintf(0, "Process Error: Slave Thread %d failed with status %d\n", i,
2293 child_thread.GetStatus());
2294 result = false;
2295 }
2296 errorcount_ += child_thread.GetErrorCount();
2297 logprintf(9, "Log: Slave Thread %d found %lld miscompares\n", i,
2298 child_thread.GetErrorCount());
2299 pages_copied_ += child_thread.GetPageCount();
2300 }
2301
2302 return result;
2303 }
2304
2305 // Network listener IO work loop. Execute until marked done.
2306 // Return false on fatal software error.
Work()2307 bool NetworkListenThread::Work() {
2308 logprintf(9, "Log: Starting network listen thread %d\n",
2309 thread_num_);
2310
2311 // Make a socket.
2312 sock_ = 0;
2313 if (!CreateSocket(&sock_)) {
2314 status_ = false;
2315 return false;
2316 }
2317 logprintf(9, "Log: Listen thread created sock\n");
2318
2319 // Allows incoming connections to be queued up by socket library.
2320 int newsock = 0;
2321 Listen();
2322 logprintf(12, "Log: Listen thread waiting for incoming connections\n");
2323
2324 // Wait on incoming connections, and spawn worker threads for them.
2325 int threadcount = 0;
2326 while (IsReadyToRun()) {
2327 // Poll for connections that we can accept().
2328 if (Wait()) {
2329 // Accept those connections.
2330 logprintf(12, "Log: Listen thread found incoming connection\n");
2331 if (GetConnection(&newsock)) {
2332 SpawnSlave(newsock, threadcount);
2333 threadcount++;
2334 }
2335 }
2336 }
2337
2338 // Gather status and join spawned threads.
2339 ReapSlaves();
2340
2341 // Delete the child workers.
2342 for (ChildVector::iterator it = child_workers_.begin();
2343 it != child_workers_.end(); ++it) {
2344 (*it)->status.Destroy();
2345 delete *it;
2346 }
2347 child_workers_.clear();
2348
2349 CloseSocket(sock_);
2350
2351 status_ = true;
2352 logprintf(9,
2353 "Log: Completed %d: network listen thread status %d, "
2354 "%d pages copied\n",
2355 thread_num_, status_, pages_copied_);
2356 return true;
2357 }
2358
2359 // Set network reflector socket struct.
SetSock(int sock)2360 void NetworkSlaveThread::SetSock(int sock) {
2361 sock_ = sock;
2362 }
2363
2364 // Network reflector IO work loop. Execute until marked done.
2365 // Return false on fatal software error.
Work()2366 bool NetworkSlaveThread::Work() {
2367 logprintf(9, "Log: Starting network slave thread %d\n",
2368 thread_num_);
2369
2370 // Verify that we have a socket.
2371 int sock = sock_;
2372 if (!sock) {
2373 status_ = false;
2374 return false;
2375 }
2376
2377 // Loop until done.
2378 int64 loops = 0;
2379 // Init a local buffer for storing data.
2380 void *local_page = NULL;
2381 #ifdef HAVE_POSIX_MEMALIGN
2382 int result = posix_memalign(&local_page, 512, sat_->page_length());
2383 #else
2384 local_page = memalign(512, sat_->page_length());
2385 int result = (local_page == 0);
2386 #endif
2387 if (result) {
2388 logprintf(0, "Process Error: net slave posix_memalign "
2389 "returned %d (fail)\n",
2390 result);
2391 status_ = false;
2392 return false;
2393 }
2394
2395 struct page_entry page;
2396 page.addr = local_page;
2397
2398 // This thread will continue to run as long as the thread on the other end of
2399 // the socket is still sending and receiving data.
2400 while (1) {
2401 // Do the network read.
2402 if (!ReceivePage(sock, &page))
2403 break;
2404
2405 // Do the network write.
2406 if (!SendPage(sock, &page))
2407 break;
2408
2409 loops++;
2410 }
2411
2412 pages_copied_ = loops;
2413 // No results provided from this type of thread.
2414 status_ = true;
2415
2416 // Clean up.
2417 CloseSocket(sock);
2418
2419 logprintf(9,
2420 "Log: Completed %d: network slave thread status %d, "
2421 "%d pages copied\n",
2422 thread_num_, status_, pages_copied_);
2423 return true;
2424 }
2425
2426 // Thread work loop. Execute until marked finished.
Work()2427 bool ErrorPollThread::Work() {
2428 logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_);
2429
2430 // This calls a generic error polling function in the Os abstraction layer.
2431 do {
2432 errorcount_ += os_->ErrorPoll();
2433 os_->ErrorWait();
2434 } while (IsReadyToRun());
2435
2436 logprintf(9, "Log: Finished system error poll thread %d: %d errors\n",
2437 thread_num_, errorcount_);
2438 status_ = true;
2439 return true;
2440 }
2441
2442 // Worker thread to heat up CPU.
2443 // This thread does not evaluate pass/fail or software error.
Work()2444 bool CpuStressThread::Work() {
2445 logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_);
2446
2447 do {
2448 // Run ludloff's platform/CPU-specific assembly workload.
2449 os_->CpuStressWorkload();
2450 YieldSelf();
2451 } while (IsReadyToRun());
2452
2453 logprintf(9, "Log: Finished CPU stress thread %d:\n",
2454 thread_num_);
2455 status_ = true;
2456 return true;
2457 }
2458
CpuCacheCoherencyThread(cc_cacheline_data * data,int cacheline_count,int thread_num,int thread_count,int inc_count)2459 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
2460 int cacheline_count,
2461 int thread_num,
2462 int thread_count,
2463 int inc_count) {
2464 cc_cacheline_data_ = data;
2465 cc_cacheline_count_ = cacheline_count;
2466 cc_thread_num_ = thread_num;
2467 cc_thread_count_ = thread_count;
2468 cc_inc_count_ = inc_count;
2469 }
2470
2471 // A very simple psuedorandom generator. Since the random number is based
2472 // on only a few simple logic operations, it can be done quickly in registers
2473 // and the compiler can inline it.
SimpleRandom(uint64 seed)2474 uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) {
2475 return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial);
2476 }
2477
2478 // Worked thread to test the cache coherency of the CPUs
2479 // Return false on fatal sw error.
Work()2480 bool CpuCacheCoherencyThread::Work() {
2481 logprintf(9, "Log: Starting the Cache Coherency thread %d\n",
2482 cc_thread_num_);
2483 uint64 time_start, time_end;
2484 struct timeval tv;
2485
2486 // Use a slightly more robust random number for the initial
2487 // value, so the random sequences from the simple generator will
2488 // be more divergent.
2489 #ifdef HAVE_RAND_R
2490 unsigned int seed = static_cast<unsigned int>(gettid());
2491 uint64 r = static_cast<uint64>(rand_r(&seed));
2492 r |= static_cast<uint64>(rand_r(&seed)) << 32;
2493 #else
2494 srand(time(NULL));
2495 uint64 r = static_cast<uint64>(rand()); // NOLINT
2496 r |= static_cast<uint64>(rand()) << 32; // NOLINT
2497 #endif
2498
2499 gettimeofday(&tv, NULL); // Get the timestamp before increments.
2500 time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
2501
2502 uint64 total_inc = 0; // Total increments done by the thread.
2503 while (IsReadyToRun()) {
2504 for (int i = 0; i < cc_inc_count_; i++) {
2505 // Choose a datastructure in random and increment the appropriate
2506 // member in that according to the offset (which is the same as the
2507 // thread number.
2508 r = SimpleRandom(r);
2509 int cline_num = r % cc_cacheline_count_;
2510 int offset;
2511 // Reverse the order for odd numbered threads in odd numbered cache
2512 // lines. This is designed for massively multi-core systems where the
2513 // number of cores exceeds the bytes in a cache line, so "distant" cores
2514 // get a chance to exercize cache coherency between them.
2515 if (cline_num & cc_thread_num_ & 1)
2516 offset = (cc_thread_count_ & ~1) - cc_thread_num_;
2517 else
2518 offset = cc_thread_num_;
2519 // Increment the member of the randomely selected structure.
2520 (cc_cacheline_data_[cline_num].num[offset])++;
2521 }
2522
2523 total_inc += cc_inc_count_;
2524
2525 // Calculate if the local counter matches with the global value
2526 // in all the cache line structures for this particular thread.
2527 int cc_global_num = 0;
2528 for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
2529 int offset;
2530 // Perform the same offset calculation from above.
2531 if (cline_num & cc_thread_num_ & 1)
2532 offset = (cc_thread_count_ & ~1) - cc_thread_num_;
2533 else
2534 offset = cc_thread_num_;
2535 cc_global_num += cc_cacheline_data_[cline_num].num[offset];
2536 // Reset the cachline member's value for the next run.
2537 cc_cacheline_data_[cline_num].num[offset] = 0;
2538 }
2539 if (sat_->error_injection())
2540 cc_global_num = -1;
2541
2542 // Since the count is only stored in a byte, to squeeze more into a
2543 // single cache line, only compare it as a byte. In the event that there
2544 // is something detected, the chance that it would be missed by a single
2545 // thread is 1 in 256. If it affects all cores, that makes the chance
2546 // of it being missed terribly minute. It seems unlikely any failure
2547 // case would be off by more than a small number.
2548 if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) {
2549 errorcount_++;
2550 logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
2551 cc_global_num, cc_inc_count_);
2552 }
2553 }
2554 gettimeofday(&tv, NULL); // Get the timestamp at the end.
2555 time_end = tv.tv_sec * 1000000ULL + tv.tv_usec;
2556
2557 uint64 us_elapsed = time_end - time_start;
2558 // inc_rate is the no. of increments per second.
2559 double inc_rate = total_inc * 1e6 / us_elapsed;
2560
2561 logprintf(4, "Stats: CC Thread(%d): Time=%llu us,"
2562 " Increments=%llu, Increments/sec = %.6lf\n",
2563 cc_thread_num_, us_elapsed, total_inc, inc_rate);
2564 logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n",
2565 cc_thread_num_);
2566 status_ = true;
2567 return true;
2568 }
2569
DiskThread(DiskBlockTable * block_table)2570 DiskThread::DiskThread(DiskBlockTable *block_table) {
2571 read_block_size_ = kSectorSize; // default 1 sector (512 bytes)
2572 write_block_size_ = kSectorSize; // this assumes read and write block size
2573 // are the same
2574 segment_size_ = -1; // use the entire disk as one segment
2575 cache_size_ = 16 * 1024 * 1024; // assume 16MiB cache by default
2576 // Use a queue such that 3/2 times as much data as the cache can hold
2577 // is written before it is read so that there is little chance the read
2578 // data is in the cache.
2579 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2;
2580 blocks_per_segment_ = 32;
2581
2582 read_threshold_ = 100000; // 100ms is a reasonable limit for
2583 write_threshold_ = 100000; // reading/writing a sector
2584
2585 read_timeout_ = 5000000; // 5 seconds should be long enough for a
2586 write_timeout_ = 5000000; // timout for reading/writing
2587
2588 device_sectors_ = 0;
2589 non_destructive_ = 0;
2590
2591 #ifdef HAVE_LIBAIO_H
2592 aio_ctx_ = 0;
2593 #endif
2594 block_table_ = block_table;
2595 update_block_table_ = 1;
2596
2597 block_buffer_ = NULL;
2598
2599 blocks_written_ = 0;
2600 blocks_read_ = 0;
2601 }
2602
~DiskThread()2603 DiskThread::~DiskThread() {
2604 if (block_buffer_)
2605 free(block_buffer_);
2606 }
2607
2608 // Set filename for device file (in /dev).
SetDevice(const char * device_name)2609 void DiskThread::SetDevice(const char *device_name) {
2610 device_name_ = device_name;
2611 }
2612
2613 // Set various parameters that control the behaviour of the test.
2614 // -1 is used as a sentinel value on each parameter (except non_destructive)
2615 // to indicate that the parameter not be set.
SetParameters(int read_block_size,int write_block_size,int64 segment_size,int64 cache_size,int blocks_per_segment,int64 read_threshold,int64 write_threshold,int non_destructive)2616 bool DiskThread::SetParameters(int read_block_size,
2617 int write_block_size,
2618 int64 segment_size,
2619 int64 cache_size,
2620 int blocks_per_segment,
2621 int64 read_threshold,
2622 int64 write_threshold,
2623 int non_destructive) {
2624 if (read_block_size != -1) {
2625 // Blocks must be aligned to the disk's sector size.
2626 if (read_block_size % kSectorSize != 0) {
2627 logprintf(0, "Process Error: Block size must be a multiple of %d "
2628 "(thread %d).\n", kSectorSize, thread_num_);
2629 return false;
2630 }
2631
2632 read_block_size_ = read_block_size;
2633 }
2634
2635 if (write_block_size != -1) {
2636 // Write blocks must be aligned to the disk's sector size and to the
2637 // block size.
2638 if (write_block_size % kSectorSize != 0) {
2639 logprintf(0, "Process Error: Write block size must be a multiple "
2640 "of %d (thread %d).\n", kSectorSize, thread_num_);
2641 return false;
2642 }
2643 if (write_block_size % read_block_size_ != 0) {
2644 logprintf(0, "Process Error: Write block size must be a multiple "
2645 "of the read block size, which is %d (thread %d).\n",
2646 read_block_size_, thread_num_);
2647 return false;
2648 }
2649
2650 write_block_size_ = write_block_size;
2651
2652 } else {
2653 // Make sure write_block_size_ is still valid.
2654 if (read_block_size_ > write_block_size_) {
2655 logprintf(5, "Log: Assuming write block size equal to read block size, "
2656 "which is %d (thread %d).\n", read_block_size_,
2657 thread_num_);
2658 write_block_size_ = read_block_size_;
2659 } else {
2660 if (write_block_size_ % read_block_size_ != 0) {
2661 logprintf(0, "Process Error: Write block size (defined as %d) must "
2662 "be a multiple of the read block size, which is %d "
2663 "(thread %d).\n", write_block_size_, read_block_size_,
2664 thread_num_);
2665 return false;
2666 }
2667 }
2668 }
2669
2670 if (cache_size != -1) {
2671 cache_size_ = cache_size;
2672 }
2673
2674 if (blocks_per_segment != -1) {
2675 if (blocks_per_segment <= 0) {
2676 logprintf(0, "Process Error: Blocks per segment must be greater than "
2677 "zero.\n (thread %d)", thread_num_);
2678 return false;
2679 }
2680
2681 blocks_per_segment_ = blocks_per_segment;
2682 }
2683
2684 if (read_threshold != -1) {
2685 if (read_threshold <= 0) {
2686 logprintf(0, "Process Error: Read threshold must be greater than "
2687 "zero (thread %d).\n", thread_num_);
2688 return false;
2689 }
2690
2691 read_threshold_ = read_threshold;
2692 }
2693
2694 if (write_threshold != -1) {
2695 if (write_threshold <= 0) {
2696 logprintf(0, "Process Error: Write threshold must be greater than "
2697 "zero (thread %d).\n", thread_num_);
2698 return false;
2699 }
2700
2701 write_threshold_ = write_threshold;
2702 }
2703
2704 if (segment_size != -1) {
2705 // Segments must be aligned to the disk's sector size.
2706 if (segment_size % kSectorSize != 0) {
2707 logprintf(0, "Process Error: Segment size must be a multiple of %d"
2708 " (thread %d).\n", kSectorSize, thread_num_);
2709 return false;
2710 }
2711
2712 segment_size_ = segment_size / kSectorSize;
2713 }
2714
2715 non_destructive_ = non_destructive;
2716
2717 // Having a queue of 150% of blocks that will fit in the disk's cache
2718 // should be enough to force out the oldest block before it is read and hence,
2719 // making sure the data comes form the disk and not the cache.
2720 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2;
2721 // Updating DiskBlockTable parameters
2722 if (update_block_table_) {
2723 block_table_->SetParameters(kSectorSize, write_block_size_,
2724 device_sectors_, segment_size_,
2725 device_name_);
2726 }
2727 return true;
2728 }
2729
2730 // Open a device, return false on failure.
OpenDevice(int * pfile)2731 bool DiskThread::OpenDevice(int *pfile) {
2732 int flags = O_RDWR | O_SYNC | O_LARGEFILE;
2733 int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
2734 if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
2735 fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
2736 os_->ActivateFlushPageCache();
2737 }
2738 if (fd < 0) {
2739 logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
2740 device_name_.c_str(), thread_num_);
2741 return false;
2742 }
2743 *pfile = fd;
2744
2745 return GetDiskSize(fd);
2746 }
2747
2748 // Retrieves the size (in bytes) of the disk/file.
2749 // Return false on failure.
GetDiskSize(int fd)2750 bool DiskThread::GetDiskSize(int fd) {
2751 struct stat device_stat;
2752 if (fstat(fd, &device_stat) == -1) {
2753 logprintf(0, "Process Error: Unable to fstat disk %s (thread %d).\n",
2754 device_name_.c_str(), thread_num_);
2755 return false;
2756 }
2757
2758 // For a block device, an ioctl is needed to get the size since the size
2759 // of the device file (i.e. /dev/sdb) is 0.
2760 if (S_ISBLK(device_stat.st_mode)) {
2761 uint64 block_size = 0;
2762
2763 if (ioctl(fd, BLKGETSIZE64, &block_size) == -1) {
2764 logprintf(0, "Process Error: Unable to ioctl disk %s (thread %d).\n",
2765 device_name_.c_str(), thread_num_);
2766 return false;
2767 }
2768
2769 // Zero size indicates nonworking device..
2770 if (block_size == 0) {
2771 os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
2772 ++errorcount_;
2773 status_ = true; // Avoid a procedural error.
2774 return false;
2775 }
2776
2777 device_sectors_ = block_size / kSectorSize;
2778
2779 } else if (S_ISREG(device_stat.st_mode)) {
2780 device_sectors_ = device_stat.st_size / kSectorSize;
2781
2782 } else {
2783 logprintf(0, "Process Error: %s is not a regular file or block "
2784 "device (thread %d).\n", device_name_.c_str(),
2785 thread_num_);
2786 return false;
2787 }
2788
2789 logprintf(12, "Log: Device sectors: %lld on disk %s (thread %d).\n",
2790 device_sectors_, device_name_.c_str(), thread_num_);
2791
2792 if (update_block_table_) {
2793 block_table_->SetParameters(kSectorSize, write_block_size_,
2794 device_sectors_, segment_size_,
2795 device_name_);
2796 }
2797
2798 return true;
2799 }
2800
CloseDevice(int fd)2801 bool DiskThread::CloseDevice(int fd) {
2802 close(fd);
2803 return true;
2804 }
2805
2806 // Return the time in microseconds.
GetTime()2807 int64 DiskThread::GetTime() {
2808 struct timeval tv;
2809 gettimeofday(&tv, NULL);
2810 return tv.tv_sec * 1000000 + tv.tv_usec;
2811 }
2812
2813 // Do randomized reads and (possibly) writes on a device.
2814 // Return false on fatal SW error, true on SW success,
2815 // regardless of whether HW failed.
DoWork(int fd)2816 bool DiskThread::DoWork(int fd) {
2817 int64 block_num = 0;
2818 int64 num_segments;
2819
2820 if (segment_size_ == -1) {
2821 num_segments = 1;
2822 } else {
2823 num_segments = device_sectors_ / segment_size_;
2824 if (device_sectors_ % segment_size_ != 0)
2825 num_segments++;
2826 }
2827
2828 // Disk size should be at least 3x cache size. See comment later for
2829 // details.
2830 sat_assert(device_sectors_ * kSectorSize > 3 * cache_size_);
2831
2832 // This disk test works by writing blocks with a certain pattern to
2833 // disk, then reading them back and verifying it against the pattern
2834 // at a later time. A failure happens when either the block cannot
2835 // be written/read or when the read block is different than what was
2836 // written. If a block takes too long to write/read, then a warning
2837 // is given instead of an error since taking too long is not
2838 // necessarily an error.
2839 //
2840 // To prevent the read blocks from coming from the disk cache,
2841 // enough blocks are written before read such that a block would
2842 // be ejected from the disk cache by the time it is read.
2843 //
2844 // TODO(amistry): Implement some sort of read/write throttling. The
2845 // flood of asynchronous I/O requests when a drive is
2846 // unplugged is causing the application and kernel to
2847 // become unresponsive.
2848
2849 while (IsReadyToRun()) {
2850 // Write blocks to disk.
2851 logprintf(16, "Log: Write phase %sfor disk %s (thread %d).\n",
2852 non_destructive_ ? "(disabled) " : "",
2853 device_name_.c_str(), thread_num_);
2854 while (IsReadyToRunNoPause() &&
2855 in_flight_sectors_.size() <
2856 static_cast<size_t>(queue_size_ + 1)) {
2857 // Confine testing to a particular segment of the disk.
2858 int64 segment = (block_num / blocks_per_segment_) % num_segments;
2859 if (!non_destructive_ &&
2860 (block_num % blocks_per_segment_ == 0)) {
2861 logprintf(20, "Log: Starting to write segment %lld out of "
2862 "%lld on disk %s (thread %d).\n",
2863 segment, num_segments, device_name_.c_str(),
2864 thread_num_);
2865 }
2866 block_num++;
2867
2868 BlockData *block = block_table_->GetUnusedBlock(segment);
2869
2870 // If an unused sequence of sectors could not be found, skip to the
2871 // next block to process. Soon, a new segment will come and new
2872 // sectors will be able to be allocated. This effectively puts a
2873 // minumim on the disk size at 3x the stated cache size, or 48MiB
2874 // if a cache size is not given (since the cache is set as 16MiB
2875 // by default). Given that todays caches are at the low MiB range
2876 // and drive sizes at the mid GB, this shouldn't pose a problem.
2877 // The 3x minimum comes from the following:
2878 // 1. In order to allocate 'y' blocks from a segment, the
2879 // segment must contain at least 2y blocks or else an
2880 // allocation may not succeed.
2881 // 2. Assume the entire disk is one segment.
2882 // 3. A full write phase consists of writing blocks corresponding to
2883 // 3/2 cache size.
2884 // 4. Therefore, the one segment must have 2 * 3/2 * cache
2885 // size worth of blocks = 3 * cache size worth of blocks
2886 // to complete.
2887 // In non-destructive mode, don't write anything to disk.
2888 if (!non_destructive_) {
2889 if (!WriteBlockToDisk(fd, block)) {
2890 block_table_->RemoveBlock(block);
2891 return true;
2892 }
2893 blocks_written_++;
2894 }
2895
2896 // Block is either initialized by writing, or in nondestructive case,
2897 // initialized by being added into the datastructure for later reading.
2898 block->initialized();
2899
2900 in_flight_sectors_.push(block);
2901 }
2902 if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
2903 return false;
2904
2905 // Verify blocks on disk.
2906 logprintf(20, "Log: Read phase for disk %s (thread %d).\n",
2907 device_name_.c_str(), thread_num_);
2908 while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) {
2909 BlockData *block = in_flight_sectors_.front();
2910 in_flight_sectors_.pop();
2911 if (!ValidateBlockOnDisk(fd, block))
2912 return true;
2913 block_table_->RemoveBlock(block);
2914 blocks_read_++;
2915 }
2916 }
2917
2918 pages_copied_ = blocks_written_ + blocks_read_;
2919 return true;
2920 }
2921
2922 // Do an asynchronous disk I/O operation.
2923 // Return false if the IO is not set up.
AsyncDiskIO(IoOp op,int fd,void * buf,int64 size,int64 offset,int64 timeout)2924 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
2925 int64 offset, int64 timeout) {
2926 #ifdef HAVE_LIBAIO_H
2927 // Use the Linux native asynchronous I/O interface for reading/writing.
2928 // A read/write consists of three basic steps:
2929 // 1. create an io context.
2930 // 2. prepare and submit an io request to the context
2931 // 3. wait for an event on the context.
2932
2933 struct {
2934 const int opcode;
2935 const char *op_str;
2936 const char *error_str;
2937 } operations[2] = {
2938 { IO_CMD_PREAD, "read", "disk-read-error" },
2939 { IO_CMD_PWRITE, "write", "disk-write-error" }
2940 };
2941
2942 struct iocb cb;
2943 memset(&cb, 0, sizeof(cb));
2944
2945 cb.aio_fildes = fd;
2946 cb.aio_lio_opcode = operations[op].opcode;
2947 cb.u.c.buf = buf;
2948 cb.u.c.nbytes = size;
2949 cb.u.c.offset = offset;
2950
2951 struct iocb *cbs[] = { &cb };
2952 if (io_submit(aio_ctx_, 1, cbs) != 1) {
2953 int error = errno;
2954 char buf[256];
2955 sat_strerror(error, buf, sizeof(buf));
2956 logprintf(0, "Process Error: Unable to submit async %s "
2957 "on disk %s (thread %d). Error %d, %s\n",
2958 operations[op].op_str, device_name_.c_str(),
2959 thread_num_, error, buf);
2960 return false;
2961 }
2962
2963 struct io_event event;
2964 memset(&event, 0, sizeof(event));
2965 struct timespec tv;
2966 tv.tv_sec = timeout / 1000000;
2967 tv.tv_nsec = (timeout % 1000000) * 1000;
2968 if (io_getevents(aio_ctx_, 1, 1, &event, &tv) != 1) {
2969 // A ctrl-c from the keyboard will cause io_getevents to fail with an
2970 // EINTR error code. This is not an error and so don't treat it as such,
2971 // but still log it.
2972 int error = errno;
2973 if (error == EINTR) {
2974 logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n",
2975 operations[op].op_str, device_name_.c_str(),
2976 thread_num_);
2977 } else {
2978 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
2979 errorcount_ += 1;
2980 logprintf(0, "Hardware Error: Timeout doing async %s to sectors "
2981 "starting at %lld on disk %s (thread %d).\n",
2982 operations[op].op_str, offset / kSectorSize,
2983 device_name_.c_str(), thread_num_);
2984 }
2985
2986 // Don't bother checking return codes since io_cancel seems to always fail.
2987 // Since io_cancel is always failing, destroying and recreating an I/O
2988 // context is a workaround for canceling an in-progress I/O operation.
2989 // TODO(amistry): Find out why io_cancel isn't working and make it work.
2990 io_cancel(aio_ctx_, &cb, &event);
2991 io_destroy(aio_ctx_);
2992 aio_ctx_ = 0;
2993 if (io_setup(5, &aio_ctx_)) {
2994 int error = errno;
2995 char buf[256];
2996 sat_strerror(error, buf, sizeof(buf));
2997 logprintf(0, "Process Error: Unable to create aio context on disk %s"
2998 " (thread %d) Error %d, %s\n",
2999 device_name_.c_str(), thread_num_, error, buf);
3000 }
3001
3002 return false;
3003 }
3004
3005 // event.res contains the number of bytes written/read or
3006 // error if < 0, I think.
3007 if (event.res != static_cast<uint64>(size)) {
3008 errorcount_++;
3009 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
3010
3011 int64 result = static_cast<int64>(event.res);
3012 if (result < 0) {
3013 switch (result) {
3014 case -EIO:
3015 logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
3016 "sectors starting at %lld on disk %s (thread %d).\n",
3017 operations[op].op_str, offset / kSectorSize,
3018 device_name_.c_str(), thread_num_);
3019 break;
3020 default:
3021 logprintf(0, "Hardware Error: Unknown error while doing %s to "
3022 "sectors starting at %lld on disk %s (thread %d).\n",
3023 operations[op].op_str, offset / kSectorSize,
3024 device_name_.c_str(), thread_num_);
3025 }
3026 } else {
3027 logprintf(0, "Hardware Error: Unable to %s to sectors starting at "
3028 "%lld on disk %s (thread %d).\n",
3029 operations[op].op_str, offset / kSectorSize,
3030 device_name_.c_str(), thread_num_);
3031 }
3032 return false;
3033 }
3034
3035 return true;
3036 #else // !HAVE_LIBAIO_H
3037 return false;
3038 #endif
3039 }
3040
3041 // Write a block to disk.
3042 // Return false if the block is not written.
WriteBlockToDisk(int fd,BlockData * block)3043 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
3044 memset(block_buffer_, 0, block->size());
3045
3046 // Fill block buffer with a pattern
3047 struct page_entry pe;
3048 if (!sat_->GetValid(&pe)) {
3049 // Even though a valid page could not be obatined, it is not an error
3050 // since we can always fill in a pattern directly, albeit slower.
3051 unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
3052 block->set_pattern(patternlist_->GetRandomPattern());
3053
3054 logprintf(11, "Log: Warning, using pattern fill fallback in "
3055 "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
3056 device_name_.c_str(), thread_num_);
3057
3058 for (unsigned int i = 0; i < block->size()/wordsize_; i++) {
3059 memblock[i] = block->pattern()->pattern(i);
3060 }
3061 } else {
3062 memcpy(block_buffer_, pe.addr, block->size());
3063 block->set_pattern(pe.pattern);
3064 sat_->PutValid(&pe);
3065 }
3066
3067 logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
3068 " (thread %d).\n",
3069 block->size()/kSectorSize, block->address(),
3070 device_name_.c_str(), thread_num_);
3071
3072 int64 start_time = GetTime();
3073
3074 if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(),
3075 block->address() * kSectorSize, write_timeout_)) {
3076 return false;
3077 }
3078
3079 int64 end_time = GetTime();
3080 logprintf(12, "Log: Writing time: %lld us (thread %d).\n",
3081 end_time - start_time, thread_num_);
3082 if (end_time - start_time > write_threshold_) {
3083 logprintf(5, "Log: Write took %lld us which is longer than threshold "
3084 "%lld us on disk %s (thread %d).\n",
3085 end_time - start_time, write_threshold_, device_name_.c_str(),
3086 thread_num_);
3087 }
3088
3089 return true;
3090 }
3091
3092 // Verify a block on disk.
3093 // Return true if the block was read, also increment errorcount
3094 // if the block had data errors or performance problems.
ValidateBlockOnDisk(int fd,BlockData * block)3095 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
3096 int64 blocks = block->size() / read_block_size_;
3097 int64 bytes_read = 0;
3098 int64 current_blocks;
3099 int64 current_bytes;
3100 uint64 address = block->address();
3101
3102 logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
3103 "(thread %d).\n",
3104 address, device_name_.c_str(), thread_num_);
3105
3106 // Read block from disk and time the read. If it takes longer than the
3107 // threshold, complain.
3108 if (lseek64(fd, address * kSectorSize, SEEK_SET) == -1) {
3109 logprintf(0, "Process Error: Unable to seek to sector %lld in "
3110 "DiskThread::ValidateSectorsOnDisk on disk %s "
3111 "(thread %d).\n", address, device_name_.c_str(), thread_num_);
3112 return false;
3113 }
3114 int64 start_time = GetTime();
3115
3116 // Split a large write-sized block into small read-sized blocks and
3117 // read them in groups of randomly-sized multiples of read block size.
3118 // This assures all data written on disk by this particular block
3119 // will be tested using a random reading pattern.
3120 while (blocks != 0) {
3121 // Test all read blocks in a written block.
3122 current_blocks = (random() % blocks) + 1;
3123 current_bytes = current_blocks * read_block_size_;
3124
3125 memset(block_buffer_, 0, current_bytes);
3126
3127 logprintf(20, "Log: Reading %lld sectors starting at sector %lld on "
3128 "disk %s (thread %d)\n",
3129 current_bytes / kSectorSize,
3130 (address * kSectorSize + bytes_read) / kSectorSize,
3131 device_name_.c_str(), thread_num_);
3132
3133 if (!AsyncDiskIO(ASYNC_IO_READ, fd, block_buffer_, current_bytes,
3134 address * kSectorSize + bytes_read,
3135 write_timeout_)) {
3136 return false;
3137 }
3138
3139 int64 end_time = GetTime();
3140 logprintf(20, "Log: Reading time: %lld us (thread %d).\n",
3141 end_time - start_time, thread_num_);
3142 if (end_time - start_time > read_threshold_) {
3143 logprintf(5, "Log: Read took %lld us which is longer than threshold "
3144 "%lld us on disk %s (thread %d).\n",
3145 end_time - start_time, read_threshold_,
3146 device_name_.c_str(), thread_num_);
3147 }
3148
3149 // In non-destructive mode, don't compare the block to the pattern since
3150 // the block was never written to disk in the first place.
3151 if (!non_destructive_) {
3152 if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
3153 0, bytes_read)) {
3154 os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
3155 errorcount_ += 1;
3156 logprintf(0, "Hardware Error: Pattern mismatch in block starting at "
3157 "sector %lld in DiskThread::ValidateSectorsOnDisk on "
3158 "disk %s (thread %d).\n",
3159 address, device_name_.c_str(), thread_num_);
3160 }
3161 }
3162
3163 bytes_read += current_blocks * read_block_size_;
3164 blocks -= current_blocks;
3165 }
3166
3167 return true;
3168 }
3169
3170 // Direct device access thread.
3171 // Return false on software error.
Work()3172 bool DiskThread::Work() {
3173 int fd;
3174
3175 logprintf(9, "Log: Starting disk thread %d, disk %s\n",
3176 thread_num_, device_name_.c_str());
3177
3178 srandom(time(NULL));
3179
3180 if (!OpenDevice(&fd)) {
3181 status_ = false;
3182 return false;
3183 }
3184
3185 // Allocate a block buffer aligned to 512 bytes since the kernel requires it
3186 // when using direct IO.
3187 #ifdef HAVE_POSIX_MEMALIGN
3188 int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
3189 sat_->page_length());
3190 #else
3191 block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
3192 int memalign_result = (block_buffer_ == 0);
3193 #endif
3194 if (memalign_result) {
3195 CloseDevice(fd);
3196 logprintf(0, "Process Error: Unable to allocate memory for buffers "
3197 "for disk %s (thread %d) posix memalign returned %d.\n",
3198 device_name_.c_str(), thread_num_, memalign_result);
3199 status_ = false;
3200 return false;
3201 }
3202
3203 #ifdef HAVE_LIBAIO_H
3204 if (io_setup(5, &aio_ctx_)) {
3205 CloseDevice(fd);
3206 logprintf(0, "Process Error: Unable to create aio context for disk %s"
3207 " (thread %d).\n",
3208 device_name_.c_str(), thread_num_);
3209 status_ = false;
3210 return false;
3211 }
3212 #endif
3213
3214 bool result = DoWork(fd);
3215
3216 status_ = result;
3217
3218 #ifdef HAVE_LIBAIO_H
3219 io_destroy(aio_ctx_);
3220 #endif
3221 CloseDevice(fd);
3222
3223 logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
3224 "%d pages copied\n",
3225 thread_num_, device_name_.c_str(), status_, pages_copied_);
3226 return result;
3227 }
3228
RandomDiskThread(DiskBlockTable * block_table)3229 RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
3230 : DiskThread(block_table) {
3231 update_block_table_ = 0;
3232 }
3233
~RandomDiskThread()3234 RandomDiskThread::~RandomDiskThread() {
3235 }
3236
3237 // Workload for random disk thread.
DoWork(int fd)3238 bool RandomDiskThread::DoWork(int fd) {
3239 logprintf(11, "Log: Random phase for disk %s (thread %d).\n",
3240 device_name_.c_str(), thread_num_);
3241 while (IsReadyToRun()) {
3242 BlockData *block = block_table_->GetRandomBlock();
3243 if (block == NULL) {
3244 logprintf(12, "Log: No block available for device %s (thread %d).\n",
3245 device_name_.c_str(), thread_num_);
3246 } else {
3247 ValidateBlockOnDisk(fd, block);
3248 block_table_->ReleaseBlock(block);
3249 blocks_read_++;
3250 }
3251 }
3252 pages_copied_ = blocks_read_;
3253 return true;
3254 }
3255
MemoryRegionThread()3256 MemoryRegionThread::MemoryRegionThread() {
3257 error_injection_ = false;
3258 pages_ = NULL;
3259 }
3260
~MemoryRegionThread()3261 MemoryRegionThread::~MemoryRegionThread() {
3262 if (pages_ != NULL)
3263 delete pages_;
3264 }
3265
3266 // Set a region of memory or MMIO to be tested.
3267 // Return false if region could not be mapped.
SetRegion(void * region,int64 size)3268 bool MemoryRegionThread::SetRegion(void *region, int64 size) {
3269 int plength = sat_->page_length();
3270 int npages = size / plength;
3271 if (size % plength) {
3272 logprintf(0, "Process Error: region size is not a multiple of SAT "
3273 "page length\n");
3274 return false;
3275 } else {
3276 if (pages_ != NULL)
3277 delete pages_;
3278 pages_ = new PageEntryQueue(npages);
3279 char *base_addr = reinterpret_cast<char*>(region);
3280 region_ = base_addr;
3281 for (int i = 0; i < npages; i++) {
3282 struct page_entry pe;
3283 init_pe(&pe);
3284 pe.addr = reinterpret_cast<void*>(base_addr + i * plength);
3285 pe.offset = i * plength;
3286
3287 pages_->Push(&pe);
3288 }
3289 return true;
3290 }
3291 }
3292
3293 // More detailed error printout for hardware errors in memory or MMIO
3294 // regions.
ProcessError(struct ErrorRecord * error,int priority,const char * message)3295 void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
3296 int priority,
3297 const char *message) {
3298 uint32 buffer_offset;
3299 if (phase_ == kPhaseCopy) {
3300 // If the error occurred on the Copy Phase, it means that
3301 // the source data (i.e., the main memory) is wrong. so
3302 // just pass it to the original ProcessError to call a
3303 // bad-dimm error
3304 WorkerThread::ProcessError(error, priority, message);
3305 } else if (phase_ == kPhaseCheck) {
3306 // A error on the Check Phase means that the memory region tested
3307 // has an error. Gathering more information and then reporting
3308 // the error.
3309 // Determine if this is a write or read error.
3310 os_->Flush(error->vaddr);
3311 error->reread = *(error->vaddr);
3312 char *good = reinterpret_cast<char*>(&(error->expected));
3313 char *bad = reinterpret_cast<char*>(&(error->actual));
3314 sat_assert(error->expected != error->actual);
3315 unsigned int offset = 0;
3316 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
3317 if (good[offset] != bad[offset])
3318 break;
3319 }
3320
3321 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
3322
3323 buffer_offset = error->vbyteaddr - region_;
3324
3325 // Find physical address if possible.
3326 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
3327 logprintf(priority,
3328 "%s: miscompare on %s, CRC check at %p(0x%llx), "
3329 "offset %llx: read:0x%016llx, reread:0x%016llx "
3330 "expected:0x%016llx\n",
3331 message,
3332 identifier_.c_str(),
3333 error->vaddr,
3334 error->paddr,
3335 buffer_offset,
3336 error->actual,
3337 error->reread,
3338 error->expected);
3339 } else {
3340 logprintf(0, "Process Error: memory region thread raised an "
3341 "unexpected error.");
3342 }
3343 }
3344
3345 // Workload for testion memory or MMIO regions.
3346 // Return false on software error.
Work()3347 bool MemoryRegionThread::Work() {
3348 struct page_entry source_pe;
3349 struct page_entry memregion_pe;
3350 bool result = true;
3351 int64 loops = 0;
3352 const uint64 error_constant = 0x00ba00000000ba00LL;
3353
3354 // For error injection.
3355 int64 *addr = 0x0;
3356 int offset = 0;
3357 int64 data = 0;
3358
3359 logprintf(9, "Log: Starting Memory Region thread %d\n", thread_num_);
3360
3361 while (IsReadyToRun()) {
3362 // Getting pages from SAT and queue.
3363 phase_ = kPhaseNoPhase;
3364 result = result && sat_->GetValid(&source_pe);
3365 if (!result) {
3366 logprintf(0, "Process Error: memory region thread failed to pop "
3367 "pages from SAT, bailing\n");
3368 break;
3369 }
3370
3371 result = result && pages_->PopRandom(&memregion_pe);
3372 if (!result) {
3373 logprintf(0, "Process Error: memory region thread failed to pop "
3374 "pages from queue, bailing\n");
3375 break;
3376 }
3377
3378 // Error injection for CRC copy.
3379 if ((sat_->error_injection() || error_injection_) && loops == 1) {
3380 addr = reinterpret_cast<int64*>(source_pe.addr);
3381 offset = random() % (sat_->page_length() / wordsize_);
3382 data = addr[offset];
3383 addr[offset] = error_constant;
3384 }
3385
3386 // Copying SAT page into memory region.
3387 phase_ = kPhaseCopy;
3388 CrcCopyPage(&memregion_pe, &source_pe);
3389 memregion_pe.pattern = source_pe.pattern;
3390
3391 // Error injection for CRC Check.
3392 if ((sat_->error_injection() || error_injection_) && loops == 2) {
3393 addr = reinterpret_cast<int64*>(memregion_pe.addr);
3394 offset = random() % (sat_->page_length() / wordsize_);
3395 data = addr[offset];
3396 addr[offset] = error_constant;
3397 }
3398
3399 // Checking page content in memory region.
3400 phase_ = kPhaseCheck;
3401 CrcCheckPage(&memregion_pe);
3402
3403 phase_ = kPhaseNoPhase;
3404 // Storing pages on their proper queues.
3405 result = result && sat_->PutValid(&source_pe);
3406 if (!result) {
3407 logprintf(0, "Process Error: memory region thread failed to push "
3408 "pages into SAT, bailing\n");
3409 break;
3410 }
3411 result = result && pages_->Push(&memregion_pe);
3412 if (!result) {
3413 logprintf(0, "Process Error: memory region thread failed to push "
3414 "pages into queue, bailing\n");
3415 break;
3416 }
3417
3418 if ((sat_->error_injection() || error_injection_) &&
3419 loops >= 1 && loops <= 2) {
3420 addr[offset] = data;
3421 }
3422
3423 loops++;
3424 YieldSelf();
3425 }
3426
3427 pages_copied_ = loops;
3428 status_ = result;
3429 logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d "
3430 "pages checked\n", thread_num_, status_, pages_copied_);
3431 return result;
3432 }
3433
3434 // The list of MSRs to read from each cpu.
3435 const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = {
3436 { kMsrTscAddr, "TSC" },
3437 { kMsrAperfAddr, "APERF" },
3438 { kMsrMperfAddr, "MPERF" },
3439 };
3440
CpuFreqThread(int num_cpus,int freq_threshold,int round)3441 CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round)
3442 : num_cpus_(num_cpus),
3443 freq_threshold_(freq_threshold),
3444 round_(round) {
3445 sat_assert(round >= 0);
3446 if (round == 0) {
3447 // If rounding is off, force rounding to the nearest MHz.
3448 round_ = 1;
3449 round_value_ = 0.5;
3450 } else {
3451 round_value_ = round/2.0;
3452 }
3453 }
3454
~CpuFreqThread()3455 CpuFreqThread::~CpuFreqThread() {
3456 }
3457
3458 // Compute the difference between the currently read MSR values and the
3459 // previously read values and store the results in delta. If any of the
3460 // values did not increase, or the TSC value is too small, returns false.
3461 // Otherwise, returns true.
ComputeDelta(CpuDataType * current,CpuDataType * previous,CpuDataType * delta)3462 bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous,
3463 CpuDataType *delta) {
3464 // Loop through the msrs.
3465 for (int msr = 0; msr < kMsrLast; msr++) {
3466 if (previous->msrs[msr] > current->msrs[msr]) {
3467 logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx "
3468 "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr],
3469 current->msrs[msr]);
3470 return false;
3471 } else {
3472 delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr];
3473 }
3474 }
3475
3476 // Check for TSC < 1 Mcycles over interval.
3477 if (delta->msrs[kMsrTsc] < (1000 * 1000)) {
3478 logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n");
3479 return false;
3480 }
3481 timersub(¤t->tv, &previous->tv, &delta->tv);
3482
3483 return true;
3484 }
3485
3486 // Compute the change in values of the MSRs between current and previous,
3487 // set the frequency in MHz of the cpu. If there is an error computing
3488 // the delta, return false. Othewise, return true.
ComputeFrequency(CpuDataType * current,CpuDataType * previous,int * freq)3489 bool CpuFreqThread::ComputeFrequency(CpuDataType *current,
3490 CpuDataType *previous, int *freq) {
3491 CpuDataType delta;
3492 if (!ComputeDelta(current, previous, &delta)) {
3493 return false;
3494 }
3495
3496 double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0;
3497 double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000
3498 * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval;
3499
3500 // Use the rounding value to round up properly.
3501 int computed = static_cast<int>(frequency + round_value_);
3502 *freq = computed - (computed % round_);
3503 return true;
3504 }
3505
3506 // This is the task function that the thread executes.
Work()3507 bool CpuFreqThread::Work() {
3508 cpu_set_t cpuset;
3509 if (!AvailableCpus(&cpuset)) {
3510 logprintf(0, "Process Error: Cannot get information about the cpus.\n");
3511 return false;
3512 }
3513
3514 // Start off indicating the test is passing.
3515 status_ = true;
3516
3517 int curr = 0;
3518 int prev = 1;
3519 uint32 num_intervals = 0;
3520 bool paused = false;
3521 bool valid;
3522 bool pass = true;
3523
3524 vector<CpuDataType> data[2];
3525 data[0].resize(num_cpus_);
3526 data[1].resize(num_cpus_);
3527 while (IsReadyToRun(&paused)) {
3528 if (paused) {
3529 // Reset the intervals and restart logic after the pause.
3530 num_intervals = 0;
3531 }
3532 if (num_intervals == 0) {
3533 // If this is the first interval, then always wait a bit before
3534 // starting to collect data.
3535 sat_sleep(kStartupDelay);
3536 }
3537
3538 // Get the per cpu counters.
3539 valid = true;
3540 for (int cpu = 0; cpu < num_cpus_; cpu++) {
3541 if (CPU_ISSET(cpu, &cpuset)) {
3542 if (!GetMsrs(cpu, &data[curr][cpu])) {
3543 logprintf(0, "Failed to get msrs on cpu %d.\n", cpu);
3544 valid = false;
3545 break;
3546 }
3547 }
3548 }
3549 if (!valid) {
3550 // Reset the number of collected intervals since something bad happened.
3551 num_intervals = 0;
3552 continue;
3553 }
3554
3555 num_intervals++;
3556
3557 // Only compute a delta when we have at least two intervals worth of data.
3558 if (num_intervals > 2) {
3559 for (int cpu = 0; cpu < num_cpus_; cpu++) {
3560 if (CPU_ISSET(cpu, &cpuset)) {
3561 int freq;
3562 if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu],
3563 &freq)) {
3564 // Reset the number of collected intervals since an unknown
3565 // error occurred.
3566 logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu);
3567 num_intervals = 0;
3568 break;
3569 }
3570 logprintf(15, "Cpu %d Freq %d\n", cpu, freq);
3571 if (freq < freq_threshold_) {
3572 errorcount_++;
3573 pass = false;
3574 logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz "
3575 "threshold %d MHz.\n", cpu, freq, freq_threshold_);
3576 }
3577 }
3578 }
3579 }
3580
3581 sat_sleep(kIntervalPause);
3582
3583 // Swap the values in curr and prev (these values flip between 0 and 1).
3584 curr ^= 1;
3585 prev ^= 1;
3586 }
3587
3588 return pass;
3589 }
3590
3591
3592 // Get the MSR values for this particular cpu and save them in data. If
3593 // any error is encountered, returns false. Otherwise, returns true.
GetMsrs(int cpu,CpuDataType * data)3594 bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) {
3595 for (int msr = 0; msr < kMsrLast; msr++) {
3596 if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) {
3597 return false;
3598 }
3599 }
3600 // Save the time at which we acquired these values.
3601 gettimeofday(&data->tv, NULL);
3602
3603 return true;
3604 }
3605
3606 // Returns true if this test can run on the current machine. Otherwise,
3607 // returns false.
CanRun()3608 bool CpuFreqThread::CanRun() {
3609 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
3610 unsigned int eax, ebx, ecx, edx;
3611
3612 // Check that the TSC feature is supported.
3613 // This check is valid for both Intel and AMD.
3614 eax = 1;
3615 cpuid(&eax, &ebx, &ecx, &edx);
3616 if (!(edx & (1 << 5))) {
3617 logprintf(0, "Process Error: No TSC support.\n");
3618 return false;
3619 }
3620
3621 // Check the highest extended function level supported.
3622 // This check is valid for both Intel and AMD.
3623 eax = 0x80000000;
3624 cpuid(&eax, &ebx, &ecx, &edx);
3625 if (eax < 0x80000007) {
3626 logprintf(0, "Process Error: No invariant TSC support.\n");
3627 return false;
3628 }
3629
3630 // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
3631 // This check is valid for both Intel and AMD.
3632 eax = 0x80000007;
3633 cpuid(&eax, &ebx, &ecx, &edx);
3634 if ((edx & (1 << 8)) == 0) {
3635 logprintf(0, "Process Error: No non-stop TSC support.\n");
3636 return false;
3637 }
3638
3639 // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
3640 // This check is valid for both Intel and AMD.
3641 eax = 0x6;
3642 cpuid(&eax, &ebx, &ecx, &edx);
3643 if ((ecx & 1) == 0) {
3644 logprintf(0, "Process Error: No APERF MSR support.\n");
3645 return false;
3646 }
3647 return true;
3648 #else
3649 logprintf(0, "Process Error: "
3650 "cpu_freq_test is only supported on X86 processors.\n");
3651 return false;
3652 #endif
3653 }
3654