1 // Copyright 2006 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 // worker.cc : individual tasks that can be run in combination to
29 #include <sys/select.h>
31 #include <sys/types.h>
32 #include <sys/times.h>
34 // These are necessary, but on by default
36 // #define __USE_LARGEFILE64
38 #include <sys/socket.h>
40 #include <arpa/inet.h>
41 #include <linux/unistd.h> // for gettid
43 // For size of block device
44 #include <sys/ioctl.h>
46 // For asynchronous I/O
47 #include <linux/aio_abi.h>
49 #include <sys/syscall.h>
54 // This file must work with autoconf on its public version,
55 // so these includes are correct.
56 #include "error_diag.h" // NOLINT
57 #include "os.h" // NOLINT
58 #include "pattern.h" // NOLINT
59 #include "queue.h" // NOLINT
60 #include "sat.h" // NOLINT
61 #include "sattypes.h" // NOLINT
62 #include "worker.h" // NOLINT
65 // Why ubuntu, do you hate gettid so bad?
66 #if !defined(__NR_gettid)
67 #define __NR_gettid 224
70 #define gettid() syscall(__NR_gettid)
71 #if !defined(CPU_SETSIZE)
72 _syscall3(int, sched_getaffinity, pid_t, pid,
73 unsigned int, len, cpu_set_t*, mask)
74 _syscall3(int, sched_setaffinity, pid_t, pid,
75 unsigned int, len, cpu_set_t*, mask)
78 // Linux aio syscalls.
79 #if !defined(__NR_io_setup)
80 #define __NR_io_setup 206
81 #define __NR_io_destroy 207
82 #define __NR_io_getevents 208
83 #define __NR_io_submit 209
84 #define __NR_io_cancel 210
87 #define io_setup(nr_events, ctxp) \
88 syscall(__NR_io_setup, (nr_events), (ctxp))
89 #define io_submit(ctx_id, nr, iocbpp) \
90 syscall(__NR_io_submit, (ctx_id), (nr), (iocbpp))
91 #define io_getevents(ctx_id, io_getevents, nr, events, timeout) \
92 syscall(__NR_io_getevents, (ctx_id), (io_getevents), (nr), (events), \
94 #define io_cancel(ctx_id, iocb, result) \
95 syscall(__NR_io_cancel, (ctx_id), (iocb), (result))
96 #define io_destroy(ctx) \
97 syscall(__NR_io_destroy, (ctx))
100 // Get HW core ID from cpuid instruction.
101 inline int apicid(void) {
103 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
104 __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
106 #warning "Unsupported CPU type: unable to determine core ID."
112 // Work around the sad fact that there are two (gnu, xsi) incompatible
113 // versions of strerror_r floating around google. Awesome.
114 bool sat_strerror(int err, char *buf, int len) {
116 char *errmsg = reinterpret_cast<char*>(strerror_r(err, buf, len));
117 int retval = reinterpret_cast<int64>(errmsg);
123 strncpy(buf, errmsg, len);
130 inline uint64 addr_to_tag(void *address) {
131 return reinterpret_cast<uint64>(address);
135 #if !defined(O_DIRECT)
136 // Sometimes this isn't available.
137 // Disregard if it's not defined.
141 // A struct to hold captured errors, for later reporting.
143 uint64 actual; // This is the actual value read.
144 uint64 reread; // This is the actual value, reread.
145 uint64 expected; // This is what it should have been.
146 uint64 *vaddr; // This is where it was (or wasn't).
147 char *vbyteaddr; // This is byte specific where the data was (or wasn't).
148 uint64 paddr; // This is the bus address, if available.
149 uint64 *tagvaddr; // This holds the tag value if this data was tagged.
150 uint64 tagpaddr; // This holds the physical address corresponding to the tag.
153 // This is a helper function to create new threads with pthreads.
154 static void *ThreadSpawnerGeneric(void *ptr) {
155 WorkerThread *worker = static_cast<WorkerThread*>(ptr);
156 worker->StartRoutine();
161 void WorkerStatus::Initialize() {
162 sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
163 sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
164 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
168 void WorkerStatus::Destroy() {
169 sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
170 sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
171 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
174 void WorkerStatus::PauseWorkers() {
175 if (SetStatus(PAUSE) != PAUSE)
176 WaitOnPauseBarrier();
179 void WorkerStatus::ResumeWorkers() {
180 if (SetStatus(RUN) == PAUSE)
181 WaitOnPauseBarrier();
184 void WorkerStatus::StopWorkers() {
185 if (SetStatus(STOP) == PAUSE)
186 WaitOnPauseBarrier();
189 bool WorkerStatus::ContinueRunning() {
190 // This loop is an optimization. We use it to immediately re-check the status
191 // after resuming from a pause, instead of returning and waiting for the next
192 // call to this function.
194 switch (GetStatus()) {
198 // Wait for the other workers to call this function so that
199 // PauseWorkers() can return.
200 WaitOnPauseBarrier();
201 // Wait for ResumeWorkers() to be called.
202 WaitOnPauseBarrier();
210 bool WorkerStatus::ContinueRunningNoPause() {
211 return (GetStatus() != STOP);
214 void WorkerStatus::RemoveSelf() {
215 // Acquire a read lock on status_rwlock_ while (status_ != PAUSE).
217 AcquireStatusReadLock();
218 if (status_ != PAUSE)
220 // We need to obey PauseWorkers() just like ContinueRunning() would, so that
221 // the other threads won't wait on pause_barrier_ forever.
223 // Wait for the other workers to call this function so that PauseWorkers()
225 WaitOnPauseBarrier();
226 // Wait for ResumeWorkers() to be called.
227 WaitOnPauseBarrier();
230 // This lock would be unnecessary if we held a write lock instead of a read
231 // lock on status_rwlock_, but that would also force all threads calling
232 // ContinueRunning() to wait on this one. Using a separate lock avoids that.
233 AcquireNumWorkersLock();
234 // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
235 // in use because (status != PAUSE).
236 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
237 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
239 ReleaseNumWorkersLock();
241 // Release status_rwlock_.
246 // Parent thread class.
247 WorkerThread::WorkerThread() {
251 runduration_usec_ = 0;
253 worker_status_ = NULL;
254 thread_spawner_ = &ThreadSpawnerGeneric;
258 WorkerThread::~WorkerThread() {}
260 // Constructors. Just init some default values.
261 FillThread::FillThread() {
262 num_pages_to_fill_ = 0;
265 // Initialize file name to empty.
266 FileThread::FileThread() {
275 // If file thread used bounce buffer in memory, account for the extra
276 // copy for memory bandwidth calculation.
277 float FileThread::GetMemoryCopiedData() {
278 if (!os_->normal_mem())
279 return GetCopiedData();
284 // Initialize target hostname to be invalid.
285 NetworkThread::NetworkThread() {
286 snprintf(ipaddr_, sizeof(ipaddr_), "Unknown");
291 NetworkSlaveThread::NetworkSlaveThread() {
295 NetworkListenThread::NetworkListenThread() {
298 // Init member variables.
299 void WorkerThread::InitThread(int thread_num_init,
301 class OsLayer *os_init,
302 class PatternList *patternlist_init,
303 WorkerStatus *worker_status) {
304 sat_assert(worker_status);
305 worker_status->AddWorkers(1);
307 thread_num_ = thread_num_init;
310 patternlist_ = patternlist_init;
311 worker_status_ = worker_status;
313 cpu_mask_ = AvailableCpus();
316 tag_mode_ = sat_->tag_mode();
320 // Use pthreads to prioritize a system thread.
321 bool WorkerThread::InitPriority() {
322 // This doesn't affect performance that much, and may not be too safe.
324 bool ret = BindToCpus(cpu_mask_);
326 logprintf(11, "Log: Bind to %x failed.\n", cpu_mask_);
328 logprintf(11, "Log: Thread %d running on apic ID %d mask %x (%x).\n",
329 thread_num_, apicid(), CurrentCpus(), cpu_mask_);
331 if (priority_ == High) {
333 param.sched_priority = 1;
334 // Set the priority; others are unchanged.
335 logprintf(0, "Log: Changing priority to SCHED_FIFO %d\n",
336 param.sched_priority);
337 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
339 sat_strerror(errno, buf, sizeof(buf));
340 logprintf(0, "Process Error: sched_setscheduler "
341 "failed - error %d %s\n",
349 // Use pthreads to create a system thread.
350 int WorkerThread::SpawnThread() {
351 // Create the new thread.
352 int result = pthread_create(&thread_, NULL, thread_spawner_, this);
355 sat_strerror(result, buf, sizeof(buf));
356 logprintf(0, "Process Error: pthread_create "
357 "failed - error %d %s\n", result,
363 // 0 is pthreads success.
367 // Kill the worker thread with SIGINT.
368 int WorkerThread::KillThread() {
369 pthread_kill(thread_, SIGINT);
373 // Block until thread has exited.
374 int WorkerThread::JoinThread() {
375 int result = pthread_join(thread_, NULL);
378 logprintf(0, "Process Error: pthread_join failed - error %d\n", result);
382 // 0 is pthreads success.
387 void WorkerThread::StartRoutine() {
392 worker_status_->RemoveSelf();
396 // Thread work loop. Execute until marked finished.
397 int WorkerThread::Work() {
399 logprintf(9, "Log: ...\n");
400 // Sleep for 1 second.
402 } while (IsReadyToRun());
408 // Returns CPU mask of CPUs available to this process,
409 // Conceptually, each bit represents a logical CPU, ie:
410 // mask = 3 (11b): cpu0, 1
411 // mask = 13 (1101b): cpu0, 2, 3
412 uint32 WorkerThread::AvailableCpus() {
414 CPU_ZERO(&curr_cpus);
415 sched_getaffinity(getppid(), sizeof(curr_cpus), &curr_cpus);
416 return cpuset_to_uint32(&curr_cpus);
420 // Returns CPU mask of CPUs this thread is bound to,
421 // Conceptually, each bit represents a logical CPU, ie:
422 // mask = 3 (11b): cpu0, 1
423 // mask = 13 (1101b): cpu0, 2, 3
424 uint32 WorkerThread::CurrentCpus() {
426 CPU_ZERO(&curr_cpus);
427 sched_getaffinity(0, sizeof(curr_cpus), &curr_cpus);
428 return cpuset_to_uint32(&curr_cpus);
432 // Bind worker thread to specified CPU(s)
434 // thread_mask: cpu_set_t representing CPUs, ie
435 // mask = 1 (01b): cpu0
436 // mask = 3 (11b): cpu0, 1
437 // mask = 13 (1101b): cpu0, 2, 3
439 // Returns true on success, false otherwise.
440 bool WorkerThread::BindToCpus(uint32 thread_mask) {
441 uint32 process_mask = AvailableCpus();
442 if (thread_mask == process_mask)
445 logprintf(11, "Log: available CPU mask - %x\n", process_mask);
446 if ((thread_mask | process_mask) != process_mask) {
447 // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist.
448 logprintf(0, "Log: requested CPUs %x not a subset of available %x\n",
449 thread_mask, process_mask);
453 cpuset_from_uint32(thread_mask, &cpuset);
454 return (sched_setaffinity(gettid(), sizeof(cpuset), &cpuset) == 0);
458 // A worker thread can yield itself to give up CPU until it's scheduled again.
459 // Returns true on success, false on error.
460 bool WorkerThread::YieldSelf() {
461 return (sched_yield() == 0);
465 // Fill this page with its pattern.
466 bool WorkerThread::FillPage(struct page_entry *pe) {
467 // Error check arguments.
469 logprintf(0, "Process Error: Fill Page entry null\n");
473 // Mask is the bitmask of indexes used by the pattern.
474 // It is the pattern size -1. Size is always a power of 2.
475 uint64 *memwords = static_cast<uint64*>(pe->addr);
476 int length = sat_->page_length();
479 // Select tag or data as appropriate.
480 for (int i = 0; i < length / wordsize_; i++) {
483 if ((i & 0x7) == 0) {
484 data.l64 = addr_to_tag(&memwords[i]);
486 data.l32.l = pe->pattern->pattern(i << 1);
487 data.l32.h = pe->pattern->pattern((i << 1) + 1);
489 memwords[i] = data.l64;
492 // Just fill in untagged data directly.
493 for (int i = 0; i < length / wordsize_; i++) {
496 data.l32.l = pe->pattern->pattern(i << 1);
497 data.l32.h = pe->pattern->pattern((i << 1) + 1);
498 memwords[i] = data.l64;
506 // Tell the thread how many pages to fill.
507 void FillThread::SetFillPages(int64 num_pages_to_fill_init) {
508 num_pages_to_fill_ = num_pages_to_fill_init;
511 // Fill this page with a random pattern.
512 bool FillThread::FillPageRandom(struct page_entry *pe) {
513 // Error check arguments.
515 logprintf(0, "Process Error: Fill Page entry null\n");
518 if ((patternlist_ == 0) || (patternlist_->Size() == 0)) {
519 logprintf(0, "Process Error: No data patterns available\n");
523 // Choose a random pattern for this block.
524 pe->pattern = patternlist_->GetRandomPattern();
525 if (pe->pattern == 0) {
526 logprintf(0, "Process Error: Null data pattern\n");
530 // Actually fill the page.
535 // Memory fill work loop. Execute until alloted pages filled.
536 int FillThread::Work() {
539 logprintf(9, "Log: Starting fill thread %d\n", thread_num_);
541 // We want to fill num_pages_to_fill pages, and
542 // stop when we've filled that many.
543 // We also want to capture early break
544 struct page_entry pe;
546 while (IsReadyToRun() && (loops < num_pages_to_fill_)) {
547 result &= sat_->GetEmpty(&pe);
549 logprintf(0, "Process Error: fill_thread failed to pop pages, "
554 // Fill the page with pattern
555 result &= FillPageRandom(&pe);
558 // Put the page back on the queue.
559 result &= sat_->PutValid(&pe);
561 logprintf(0, "Process Error: fill_thread failed to push pages, "
568 // Fill in thread status.
569 pages_copied_ = loops;
571 logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n",
572 thread_num_, status_, pages_copied_);
577 // Print error information about a data miscompare.
578 void WorkerThread::ProcessError(struct ErrorRecord *error,
580 const char *message) {
581 char dimm_string[256] = "";
583 int apic_id = apicid();
584 uint32 cpumask = CurrentCpus();
586 // Determine if this is a write or read error.
587 os_->Flush(error->vaddr);
588 error->reread = *(error->vaddr);
590 char *good = reinterpret_cast<char*>(&(error->expected));
591 char *bad = reinterpret_cast<char*>(&(error->actual));
593 sat_assert(error->expected != error->actual);
594 unsigned int offset = 0;
595 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
596 if (good[offset] != bad[offset])
600 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
602 // Find physical address if possible.
603 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
605 // Pretty print DIMM mapping if available.
606 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
608 // Report parseable error.
610 // Run miscompare error through diagnoser for logging and reporting.
611 os_->error_diagnoser_->AddMiscompareError(dimm_string,
612 reinterpret_cast<uint64>
616 "%s: miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
617 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
630 // Overwrite incorrect data with correct data to prevent
631 // future miscompares when this data is reused.
632 *(error->vaddr) = error->expected;
633 os_->Flush(error->vaddr);
638 // Print error information about a data miscompare.
639 void FileThread::ProcessError(struct ErrorRecord *error,
641 const char *message) {
642 char dimm_string[256] = "";
644 // Determine if this is a write or read error.
645 os_->Flush(error->vaddr);
646 error->reread = *(error->vaddr);
648 char *good = reinterpret_cast<char*>(&(error->expected));
649 char *bad = reinterpret_cast<char*>(&(error->actual));
651 sat_assert(error->expected != error->actual);
652 unsigned int offset = 0;
653 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
654 if (good[offset] != bad[offset])
658 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
660 // Find physical address if possible.
661 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
663 // Pretty print DIMM mapping if available.
664 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
666 // If crc_page_ is valid, ie checking content read back from file,
667 // track src/dst memory addresses. Otherwise catagorize as general
668 // mememory miscompare for CRC checking everywhere else.
669 if (crc_page_ != -1) {
670 int miscompare_byteoffset = static_cast<char*>(error->vbyteaddr) -
671 static_cast<char*>(page_recs_[crc_page_].dst);
672 os_->error_diagnoser_->AddHDDMiscompareError(devicename_,
674 miscompare_byteoffset,
675 page_recs_[crc_page_].src,
676 page_recs_[crc_page_].dst);
678 os_->error_diagnoser_->AddMiscompareError(dimm_string,
679 reinterpret_cast<uint64>
684 "%s: miscompare on %s at %p(0x%llx:%s): read:0x%016llx, "
685 "reread:0x%016llx expected:0x%016llx\n",
695 // Overwrite incorrect data with correct data to prevent
696 // future miscompares when this data is reused.
697 *(error->vaddr) = error->expected;
698 os_->Flush(error->vaddr);
702 // Do a word by word result check of a region.
703 // Print errors on mismatches.
704 int WorkerThread::CheckRegion(void *addr,
705 class Pattern *pattern,
708 int64 pattern_offset) {
709 uint64 *memblock = static_cast<uint64*>(addr);
710 const int kErrorLimit = 128;
712 int overflowerrors = 0; // Count of overflowed errors.
713 bool page_error = false;
714 string errormessage("Hardware Error");
716 recorded[kErrorLimit]; // Queued errors for later printing.
718 // For each word in the data region.
719 for (int i = 0; i < length / wordsize_; i++) {
720 uint64 actual = memblock[i];
723 // Determine the value that should be there.
725 int index = 2 * i + pattern_offset;
726 data.l32.l = pattern->pattern(index);
727 data.l32.h = pattern->pattern(index + 1);
729 // Check tags if necessary.
730 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) {
731 expected = addr_to_tag(&memblock[i]);
735 // If the value is incorrect, save an error record for later printing.
736 if (actual != expected) {
737 if (errors < kErrorLimit) {
738 recorded[errors].actual = actual;
739 recorded[errors].expected = expected;
740 recorded[errors].vaddr = &memblock[i];
744 // If we have overflowed the error queue, just print the errors now.
745 logprintf(10, "Log: Error record overflow, too many miscompares!\n");
746 errormessage = "Page Error";
752 // Find if this is a whole block corruption.
753 if (page_error && !tag_mode_) {
754 int patsize = patternlist_->Size();
755 for (int pat = 0; pat < patsize; pat++) {
756 class Pattern *altpattern = patternlist_->GetPattern(pat);
759 const int kGoodAgain = 2;
760 const int kNoMatch = 3;
762 unsigned int badstart = 0;
763 unsigned int badend = 0;
765 // Don't match against ourself!
766 if (pattern == altpattern)
769 for (int i = 0; i < length / wordsize_; i++) {
770 uint64 actual = memblock[i];
774 // Determine the value that should be there.
775 int index = 2 * i + pattern_offset;
777 expected.l32.l = pattern->pattern(index);
778 expected.l32.h = pattern->pattern(index + 1);
780 possible.l32.l = pattern->pattern(index);
781 possible.l32.h = pattern->pattern(index + 1);
783 if (state == kGood) {
784 if (actual == expected.l64) {
786 } else if (actual == possible.l64) {
795 } else if (state == kBad) {
796 if (actual == possible.l64) {
799 } else if (actual == expected.l64) {
806 } else if (state == kGoodAgain) {
807 if (actual == expected.l64) {
816 if ((state == kGoodAgain) || (state == kBad)) {
817 unsigned int blockerrors = badend - badstart + 1;
818 errormessage = "Block Error";
819 ProcessError(&recorded[0], 0, errormessage.c_str());
820 logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
821 "%d bytes from offset 0x%x to 0x%x\n",
823 altpattern->name(), pattern->name(),
824 blockerrors * wordsize_,
825 offset + badstart * wordsize_,
826 offset + badend * wordsize_);
827 errorcount_ += blockerrors;
834 // Process error queue after all errors have been recorded.
835 for (int err = 0; err < errors; err++) {
837 if (errorcount_ + err < 30)
838 priority = 0; // Bump up the priority for the first few errors.
839 ProcessError(&recorded[err], priority, errormessage.c_str());
843 // For each word in the data region.
844 int error_recount = 0;
845 for (int i = 0; i < length / wordsize_; i++) {
846 uint64 actual = memblock[i];
849 // Determine the value that should be there.
850 int index = 2 * i + pattern_offset;
852 data.l32.l = pattern->pattern(index);
853 data.l32.h = pattern->pattern(index + 1);
856 // Check tags if necessary.
857 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) {
858 expected = addr_to_tag(&memblock[i]);
861 // If the value is incorrect, save an error record for later printing.
862 if (actual != expected) {
863 if (error_recount < kErrorLimit) {
864 // We already reported these.
867 // If we have overflowed the error queue, print the errors now.
868 struct ErrorRecord er;
870 er.expected = expected;
871 er.vaddr = &memblock[i];
873 // Do the error printout. This will take a long time and
874 // likely change the machine state.
875 ProcessError(&er, 12, errormessage.c_str());
882 // Keep track of observed errors.
883 errorcount_ += errors + overflowerrors;
884 return errors + overflowerrors;
887 float WorkerThread::GetCopiedData() {
888 return pages_copied_ * sat_->page_length() / kMegabyte;
891 // Calculate the CRC of a region.
892 // Result check if the CRC mismatches.
893 int WorkerThread::CrcCheckPage(struct page_entry *srcpe) {
894 const int blocksize = 4096;
895 const int blockwords = blocksize / wordsize_;
898 const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
899 uint64 *memblock = static_cast<uint64*>(srcpe->addr);
900 int blocks = sat_->page_length() / blocksize;
901 for (int currentblock = 0; currentblock < blocks; currentblock++) {
902 uint64 *memslice = memblock + currentblock * blockwords;
906 AdlerAddrCrcC(memslice, blocksize, &crc, srcpe);
908 CalculateAdlerChecksum(memslice, blocksize, &crc);
911 // If the CRC does not match, we'd better look closer.
912 if (!crc.Equals(*expectedcrc)) {
913 logprintf(11, "Log: CrcCheckPage Falling through to slow compare, "
914 "CRC mismatch %s != %s\n",
915 crc.ToHexString().c_str(),
916 expectedcrc->ToHexString().c_str());
917 int errorcount = CheckRegion(memslice,
920 currentblock * blocksize, 0);
921 if (errorcount == 0) {
922 logprintf(0, "Log: CrcCheckPage CRC mismatch %s != %s, "
923 "but no miscompares found.\n",
924 crc.ToHexString().c_str(),
925 expectedcrc->ToHexString().c_str());
927 errors += errorcount;
931 // For odd length transfers, we should never hit this.
932 int leftovers = sat_->page_length() % blocksize;
934 uint64 *memslice = memblock + blocks * blockwords;
935 errors += CheckRegion(memslice,
938 blocks * blocksize, 0);
944 // Print error information about a data miscompare.
945 void WorkerThread::ProcessTagError(struct ErrorRecord *error,
947 const char *message) {
948 char dimm_string[256] = "";
949 char tag_dimm_string[256] = "";
950 bool read_error = false;
952 int apic_id = apicid();
953 uint32 cpumask = CurrentCpus();
955 // Determine if this is a write or read error.
956 os_->Flush(error->vaddr);
957 error->reread = *(error->vaddr);
959 // Distinguish read and write errors.
960 if (error->actual != error->reread) {
964 sat_assert(error->expected != error->actual);
966 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr);
968 // Find physical address if possible.
969 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
970 error->tagpaddr = os_->VirtualToPhysical(error->tagvaddr);
972 // Pretty print DIMM mapping if available.
973 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
974 // Pretty print DIMM mapping if available.
975 os_->FindDimm(error->tagpaddr, tag_dimm_string, sizeof(tag_dimm_string));
977 // Report parseable error.
980 "%s: Tag from %p(0x%llx:%s) (%s) "
981 "miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
982 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
984 error->tagvaddr, error->tagpaddr,
986 read_error ? "read error" : "write error",
999 // Overwrite incorrect data with correct data to prevent
1000 // future miscompares when this data is reused.
1001 *(error->vaddr) = error->expected;
1002 os_->Flush(error->vaddr);
1006 // Print out and log a tag error.
1007 bool WorkerThread::ReportTagError(
1011 struct ErrorRecord er;
1017 // Generate vaddr from tag.
1018 er.tagvaddr = reinterpret_cast<uint64*>(actual);
1020 ProcessTagError(&er, 0, "Hardware Error");
1024 // C implementation of Adler memory copy, with memory tagging.
1025 bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64,
1027 unsigned int size_in_bytes,
1028 AdlerChecksum *checksum,
1029 struct page_entry *pe) {
1030 // Use this data wrapper to access memory with 64bit read/write.
1033 unsigned int count = size_in_bytes / sizeof(data);
1035 if (count > ((1U) << 19)) {
1036 // Size is too large, must be strictly less than 512 KB.
1045 class Pattern *pattern = pe->pattern;
1049 // Process 64 bits at a time.
1050 if ((i & 0x7) == 0) {
1051 data.l64 = srcmem64[i];
1052 dstdata.l64 = dstmem64[i];
1053 uint64 src_tag = addr_to_tag(&srcmem64[i]);
1054 uint64 dst_tag = addr_to_tag(&dstmem64[i]);
1055 // Detect if tags have been corrupted.
1056 if (data.l64 != src_tag)
1057 ReportTagError(&srcmem64[i], data.l64, src_tag);
1058 if (dstdata.l64 != dst_tag)
1059 ReportTagError(&dstmem64[i], dstdata.l64, dst_tag);
1061 data.l32.l = pattern->pattern(i << 1);
1062 data.l32.h = pattern->pattern((i << 1) + 1);
1063 a1 = a1 + data.l32.l;
1065 a1 = a1 + data.l32.h;
1069 dstmem64[i] = data.l64;
1072 data.l64 = srcmem64[i];
1073 a1 = a1 + data.l32.l;
1075 a1 = a1 + data.l32.h;
1077 dstmem64[i] = data.l64;
1081 data.l64 = srcmem64[i];
1082 a2 = a2 + data.l32.l;
1084 a2 = a2 + data.l32.h;
1086 dstmem64[i] = data.l64;
1089 checksum->Set(a1, a2, b1, b2);
1094 // C implementation of Adler memory crc.
1095 bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
1096 unsigned int size_in_bytes,
1097 AdlerChecksum *checksum,
1098 struct page_entry *pe) {
1099 // Use this data wrapper to access memory with 64bit read/write.
1101 unsigned int count = size_in_bytes / sizeof(data);
1103 if (count > ((1U) << 19)) {
1104 // Size is too large, must be strictly less than 512 KB.
1113 class Pattern *pattern = pe->pattern;
1117 // Process 64 bits at a time.
1118 if ((i & 0x7) == 0) {
1119 data.l64 = srcmem64[i];
1120 uint64 src_tag = addr_to_tag(&srcmem64[i]);
1121 // Check that tags match expected.
1122 if (data.l64 != src_tag)
1123 ReportTagError(&srcmem64[i], data.l64, src_tag);
1126 data.l32.l = pattern->pattern(i << 1);
1127 data.l32.h = pattern->pattern((i << 1) + 1);
1128 a1 = a1 + data.l32.l;
1130 a1 = a1 + data.l32.h;
1135 data.l64 = srcmem64[i];
1136 a1 = a1 + data.l32.l;
1138 a1 = a1 + data.l32.h;
1143 data.l64 = srcmem64[i];
1144 a2 = a2 + data.l32.l;
1146 a2 = a2 + data.l32.h;
1150 checksum->Set(a1, a2, b1, b2);
1154 // Copy a block of memory quickly, while keeping a CRC of the data.
1155 // Result check if the CRC mismatches.
1156 int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
1157 struct page_entry *srcpe) {
1159 const int blocksize = 4096;
1160 const int blockwords = blocksize / wordsize_;
1161 int blocks = sat_->page_length() / blocksize;
1163 // Base addresses for memory copy
1164 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr);
1165 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr);
1166 // Remember the expected CRC
1167 const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
1169 for (int currentblock = 0; currentblock < blocks; currentblock++) {
1170 uint64 *targetmem = targetmembase + currentblock * blockwords;
1171 uint64 *sourcemem = sourcemembase + currentblock * blockwords;
1175 AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
1177 AdlerMemcpyC(targetmem, sourcemem, blocksize, &crc);
1180 // Investigate miscompares.
1181 if (!crc.Equals(*expectedcrc)) {
1182 logprintf(11, "Log: CrcCopyPage Falling through to slow compare, "
1183 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(),
1184 expectedcrc->ToHexString().c_str());
1185 int errorcount = CheckRegion(sourcemem,
1188 currentblock * blocksize, 0);
1189 if (errorcount == 0) {
1190 logprintf(0, "Log: CrcCopyPage CRC mismatch %s != %s, "
1191 "but no miscompares found. Retrying with fresh data.\n",
1192 crc.ToHexString().c_str(),
1193 expectedcrc->ToHexString().c_str());
1195 // Copy the data originally read from this region back again.
1196 // This data should have any corruption read originally while
1197 // calculating the CRC.
1198 memcpy(sourcemem, targetmem, blocksize);
1199 errorcount = CheckRegion(sourcemem,
1202 currentblock * blocksize, 0);
1203 if (errorcount == 0) {
1204 int apic_id = apicid();
1205 uint32 cpumask = CurrentCpus();
1206 logprintf(0, "Process Error: CPU %d(0x%x) CrcCopyPage "
1207 "CRC mismatch %s != %s, "
1208 "but no miscompares found on second pass.\n",
1210 crc.ToHexString().c_str(),
1211 expectedcrc->ToHexString().c_str());
1212 struct ErrorRecord er;
1213 er.actual = sourcemem[0];
1215 er.vaddr = sourcemem;
1216 ProcessError(&er, 0, "Hardware Error");
1220 errors += errorcount;
1224 // For odd length transfers, we should never hit this.
1225 int leftovers = sat_->page_length() % blocksize;
1227 uint64 *targetmem = targetmembase + blocks * blockwords;
1228 uint64 *sourcemem = sourcemembase + blocks * blockwords;
1230 errors += CheckRegion(sourcemem,
1233 blocks * blocksize, 0);
1234 int leftoverwords = leftovers / wordsize_;
1235 for (int i = 0; i < leftoverwords; i++) {
1236 targetmem[i] = sourcemem[i];
1240 // Update pattern reference to reflect new contents.
1241 dstpe->pattern = srcpe->pattern;
1243 // Clean clean clean the errors away.
1245 // TODO(nsanders): Maybe we should patch rather than fill? Filling may
1246 // cause bad data to be propogated across the page.
1254 // Invert a block of memory quickly, traversing downwards.
1255 int InvertThread::InvertPageDown(struct page_entry *srcpe) {
1256 const int blocksize = 4096;
1257 const int blockwords = blocksize / wordsize_;
1258 int blocks = sat_->page_length() / blocksize;
1260 // Base addresses for memory copy
1261 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr);
1263 for (int currentblock = blocks-1; currentblock >= 0; currentblock--) {
1264 unsigned int *sourcemem = sourcemembase + currentblock * blockwords;
1265 for (int i = blockwords - 32; i >= 0; i -= 32) {
1266 for (int index = i + 31; index >= i; --index) {
1267 unsigned int actual = sourcemem[index];
1268 sourcemem[index] = ~actual;
1270 OsLayer::FastFlush(&sourcemem[i]);
1277 // Invert a block of memory, traversing upwards.
1278 int InvertThread::InvertPageUp(struct page_entry *srcpe) {
1279 const int blocksize = 4096;
1280 const int blockwords = blocksize / wordsize_;
1281 int blocks = sat_->page_length() / blocksize;
1283 // Base addresses for memory copy
1284 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr);
1286 for (int currentblock = 0; currentblock < blocks; currentblock++) {
1287 unsigned int *sourcemem = sourcemembase + currentblock * blockwords;
1288 for (int i = 0; i < blockwords; i += 32) {
1289 for (int index = i; index <= i + 31; ++index) {
1290 unsigned int actual = sourcemem[index];
1291 sourcemem[index] = ~actual;
1293 OsLayer::FastFlush(&sourcemem[i]);
1299 // Copy a block of memory quickly, while keeping a CRC of the data.
1300 // Result check if the CRC mismatches. Warm the CPU while running
1301 int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
1302 struct page_entry *srcpe) {
1304 const int blocksize = 4096;
1305 const int blockwords = blocksize / wordsize_;
1306 int blocks = sat_->page_length() / blocksize;
1308 // Base addresses for memory copy
1309 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr);
1310 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr);
1311 // Remember the expected CRC
1312 const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
1314 for (int currentblock = 0; currentblock < blocks; currentblock++) {
1315 uint64 *targetmem = targetmembase + currentblock * blockwords;
1316 uint64 *sourcemem = sourcemembase + currentblock * blockwords;
1320 AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
1322 os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc);
1325 // Investigate miscompares.
1326 if (!crc.Equals(*expectedcrc)) {
1327 logprintf(11, "Log: CrcWarmCopyPage Falling through to slow compare, "
1328 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(),
1329 expectedcrc->ToHexString().c_str());
1330 int errorcount = CheckRegion(sourcemem,
1333 currentblock * blocksize, 0);
1334 if (errorcount == 0) {
1335 logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, "
1336 "but no miscompares found. Retrying with fresh data.\n",
1337 crc.ToHexString().c_str(),
1338 expectedcrc->ToHexString().c_str());
1340 // Copy the data originally read from this region back again.
1341 // This data should have any corruption read originally while
1342 // calculating the CRC.
1343 memcpy(sourcemem, targetmem, blocksize);
1344 errorcount = CheckRegion(sourcemem,
1347 currentblock * blocksize, 0);
1348 if (errorcount == 0) {
1349 logprintf(0, "Process Error: CrcWarmCopyPage CRC mismatch %s "
1350 "!= %s, but no miscompares found on second pass.\n",
1351 crc.ToHexString().c_str(),
1352 expectedcrc->ToHexString().c_str());
1356 errors += errorcount;
1360 // For odd length transfers, we should never hit this.
1361 int leftovers = sat_->page_length() % blocksize;
1363 uint64 *targetmem = targetmembase + blocks * blockwords;
1364 uint64 *sourcemem = sourcemembase + blocks * blockwords;
1366 errors += CheckRegion(sourcemem,
1369 blocks * blocksize, 0);
1370 int leftoverwords = leftovers / wordsize_;
1371 for (int i = 0; i < leftoverwords; i++) {
1372 targetmem[i] = sourcemem[i];
1376 // Update pattern reference to reflect new contents.
1377 dstpe->pattern = srcpe->pattern;
1379 // Clean clean clean the errors away.
1381 // TODO(nsanders): Maybe we should patch rather than fill? Filling may
1382 // cause bad data to be propogated across the page.
1390 // Memory check work loop. Execute until done, then exhaust pages.
1391 int CheckThread::Work() {
1392 struct page_entry pe;
1396 logprintf(9, "Log: Starting Check thread %d\n", thread_num_);
1398 // We want to check all the pages, and
1399 // stop when there aren't any left.
1401 result &= sat_->GetValid(&pe);
1403 if (IsReadyToRunNoPause())
1404 logprintf(0, "Process Error: check_thread failed to pop pages, "
1411 // Do the result check.
1414 // Push pages back on the valid queue if we are still going,
1415 // throw them out otherwise.
1416 if (IsReadyToRunNoPause())
1417 result &= sat_->PutValid(&pe);
1419 result &= sat_->PutEmpty(&pe);
1421 logprintf(0, "Process Error: check_thread failed to push pages, "
1428 pages_copied_ = loops;
1430 logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n",
1431 thread_num_, status_, pages_copied_);
1436 // Memory copy work loop. Execute until marked done.
1437 int CopyThread::Work() {
1438 struct page_entry src;
1439 struct page_entry dst;
1443 logprintf(9, "Log: Starting copy thread %d: cpu %x, mem %x\n",
1444 thread_num_, cpu_mask_, tag_);
1446 while (IsReadyToRun()) {
1447 // Pop the needed pages.
1448 result &= sat_->GetValid(&src, tag_);
1449 result &= sat_->GetEmpty(&dst, tag_);
1451 logprintf(0, "Process Error: copy_thread failed to pop pages, "
1456 // Force errors for unittests.
1457 if (sat_->error_injection()) {
1459 char *addr = reinterpret_cast<char*>(src.addr);
1460 int offset = random() % sat_->page_length();
1461 addr[offset] = 0xba;
1465 // We can use memcpy, or CRC check while we copy.
1467 CrcWarmCopyPage(&dst, &src);
1468 } else if (sat_->strict()) {
1469 CrcCopyPage(&dst, &src);
1471 memcpy(dst.addr, src.addr, sat_->page_length());
1472 dst.pattern = src.pattern;
1475 result &= sat_->PutValid(&dst);
1476 result &= sat_->PutEmpty(&src);
1478 // Copy worker-threads yield themselves at the end of each copy loop,
1479 // to avoid threads from preempting each other in the middle of the inner
1480 // copy-loop. Cooperations between Copy worker-threads results in less
1481 // unnecessary cache thrashing (which happens when context-switching in the
1482 // middle of the inner copy-loop).
1486 logprintf(0, "Process Error: copy_thread failed to push pages, "
1493 pages_copied_ = loops;
1495 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
1496 thread_num_, status_, pages_copied_);
1500 // Memory invert work loop. Execute until marked done.
1501 int InvertThread::Work() {
1502 struct page_entry src;
1506 logprintf(9, "Log: Starting invert thread %d\n", thread_num_);
1508 while (IsReadyToRun()) {
1509 // Pop the needed pages.
1510 result &= sat_->GetValid(&src);
1512 logprintf(0, "Process Error: invert_thread failed to pop pages, "
1520 // For the same reason CopyThread yields itself (see YieldSelf comment
1521 // in CopyThread::Work(), InvertThread yields itself after each invert
1522 // operation to improve cooperation between different worker threads
1523 // stressing the memory/cache.
1526 InvertPageDown(&src);
1528 InvertPageDown(&src);
1536 result &= sat_->PutValid(&src);
1538 logprintf(0, "Process Error: invert_thread failed to push pages, "
1545 pages_copied_ = loops * 2;
1547 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
1548 thread_num_, status_, pages_copied_);
1553 // Set file name to use for File IO.
1554 void FileThread::SetFile(const char *filename_init) {
1555 filename_ = filename_init;
1556 devicename_ = os_->FindFileDevice(filename_);
1559 // Open the file for access.
1560 bool FileThread::OpenFile(int *pfile) {
1561 int fd = open(filename_.c_str(),
1562 O_RDWR | O_CREAT | O_SYNC | O_DIRECT,
1565 logprintf(0, "Process Error: Failed to create file %s!!\n",
1576 bool FileThread::CloseFile(int fd) {
1581 // Check sector tagging.
1582 bool FileThread::SectorTagPage(struct page_entry *src, int block) {
1583 int page_length = sat_->page_length();
1584 struct FileThread::SectorTag *tag =
1585 (struct FileThread::SectorTag *)(src->addr);
1588 unsigned char magic = ((0xba + thread_num_) & 0xff);
1589 for (int sec = 0; sec < page_length / 512; sec++) {
1590 tag[sec].magic = magic;
1591 tag[sec].block = block & 0xff;
1592 tag[sec].sector = sec & 0xff;
1593 tag[sec].pass = pass_ & 0xff;
1598 bool FileThread::WritePageToFile(int fd, struct page_entry *src) {
1599 int page_length = sat_->page_length();
1600 // Fill the file with our data.
1601 int64 size = write(fd, src->addr, page_length);
1603 if (size != page_length) {
1604 os_->ErrorReport(devicename_.c_str(), "write-error", 1);
1606 logprintf(0, "Block Error: file_thread failed to write, "
1613 // Write the data to the file.
1614 bool FileThread::WritePages(int fd) {
1615 int strict = sat_->strict();
1617 // Start fresh at beginning of file for each batch of pages.
1618 lseek(fd, 0, SEEK_SET);
1619 for (int i = 0; i < sat_->disk_pages(); i++) {
1620 struct page_entry src;
1621 if (!GetValidPage(&src))
1623 // Save expected pattern.
1624 page_recs_[i].pattern = src.pattern;
1625 page_recs_[i].src = src.addr;
1627 // Check data correctness.
1631 SectorTagPage(&src, i);
1633 bool result = WritePageToFile(fd, &src);
1635 if (!PutEmptyPage(&src))
1644 // Copy data from file into memory block.
1645 bool FileThread::ReadPageFromFile(int fd, struct page_entry *dst) {
1646 int page_length = sat_->page_length();
1648 // Do the actual read.
1649 int64 size = read(fd, dst->addr, page_length);
1650 if (size != page_length) {
1651 os_->ErrorReport(devicename_.c_str(), "read-error", 1);
1652 logprintf(0, "Block Error: file_thread failed to read, "
1660 // Check sector tagging.
1661 bool FileThread::SectorValidatePage(const struct PageRec &page,
1662 struct page_entry *dst, int block) {
1664 static int calls = 0;
1667 // Do sector tag compare.
1668 int firstsector = -1;
1669 int lastsector = -1;
1670 bool badsector = false;
1671 int page_length = sat_->page_length();
1673 // Cast data block into an array of tagged sectors.
1674 struct FileThread::SectorTag *tag =
1675 (struct FileThread::SectorTag *)(dst->addr);
1677 sat_assert(sizeof(*tag) == 512);
1680 if (sat_->error_injection()) {
1682 for (int badsec = 8; badsec < 17; badsec++)
1683 tag[badsec].pass = 27;
1686 (static_cast<int32*>(dst->addr))[27] = 0xbadda7a;
1690 // Check each sector for the correct tag we added earlier,
1691 // then revert the tag to the to normal data pattern.
1692 unsigned char magic = ((0xba + thread_num_) & 0xff);
1693 for (int sec = 0; sec < page_length / 512; sec++) {
1695 if ((tag[sec].magic != magic) ||
1696 (tag[sec].block != (block & 0xff)) ||
1697 (tag[sec].sector != (sec & 0xff)) ||
1698 (tag[sec].pass != (pass_ & 0xff))) {
1699 // Offset calculation for tag location.
1700 int offset = sec * sizeof(SectorTag);
1701 if (tag[sec].block != (block & 0xff))
1702 offset += 1 * sizeof(uint8);
1703 else if (tag[sec].sector != (sec & 0xff))
1704 offset += 2 * sizeof(uint8);
1705 else if (tag[sec].pass != (pass_ & 0xff))
1706 offset += 3 * sizeof(uint8);
1708 // Run sector tag error through diagnoser for logging and reporting.
1710 os_->error_diagnoser_->AddHDDSectorTagError(devicename_, tag[sec].block,
1713 page.src, page.dst);
1715 logprintf(5, "Sector Error: Sector tag @ 0x%x, pass %d/%d. "
1716 "sec %x/%x, block %d/%d, magic %x/%x, File: %s \n",
1717 block * page_length + 512 * sec,
1718 (pass_ & 0xff), (unsigned int)tag[sec].pass,
1719 sec, (unsigned int)tag[sec].sector,
1720 block, (unsigned int)tag[sec].block,
1721 magic, (unsigned int)tag[sec].magic,
1724 // Keep track of first and last bad sector.
1725 if (firstsector == -1)
1726 firstsector = (block * page_length / 512) + sec;
1727 lastsector = (block * page_length / 512) + sec;
1730 // Patch tag back to proper pattern.
1731 unsigned int *addr = (unsigned int *)(&tag[sec]);
1732 *addr = dst->pattern->pattern(512 * sec / sizeof(*addr));
1735 // If we found sector errors:
1736 if (badsector == true) {
1737 logprintf(5, "Log: file sector miscompare at offset %x-%x. File: %s\n",
1739 ((lastsector + 1) * 512) - 1,
1742 // Either exit immediately, or patch the data up and continue.
1743 if (sat_->stop_on_error()) {
1746 // Patch up bad pages.
1747 for (int block = (firstsector * 512) / page_length;
1748 block <= (lastsector * 512) / page_length;
1750 unsigned int *memblock = static_cast<unsigned int *>(dst->addr);
1751 int length = page_length / wordsize_;
1752 for (int i = 0; i < length; i++) {
1753 memblock[i] = dst->pattern->pattern(i);
1761 // Get memory for an incoming data transfer..
1762 bool FileThread::PagePrepare() {
1763 // We can only do direct IO to SAT pages if it is normal mem.
1764 page_io_ = os_->normal_mem();
1766 // Init a local buffer if we need it.
1768 int result = posix_memalign(&local_page_, 512, sat_->page_length());
1770 logprintf(0, "Process Error: disk thread posix_memalign "
1771 "returned %d (fail)\n",
1781 // Remove memory allocated for data transfer.
1782 bool FileThread::PageTeardown() {
1783 // Free a local buffer if we need to.
1792 // Get memory for an incoming data transfer..
1793 bool FileThread::GetEmptyPage(struct page_entry *dst) {
1795 if (!sat_->GetEmpty(dst))
1798 dst->addr = local_page_;
1805 // Get memory for an outgoing data transfer..
1806 bool FileThread::GetValidPage(struct page_entry *src) {
1807 struct page_entry tmp;
1808 if (!sat_->GetValid(&tmp))
1814 src->addr = local_page_;
1816 CrcCopyPage(src, &tmp);
1817 if (!sat_->PutValid(&tmp))
1824 // Throw out a used empty page.
1825 bool FileThread::PutEmptyPage(struct page_entry *src) {
1827 if (!sat_->PutEmpty(src))
1833 // Throw out a used, filled page.
1834 bool FileThread::PutValidPage(struct page_entry *src) {
1836 if (!sat_->PutValid(src))
1844 // Copy data from file into memory blocks.
1845 bool FileThread::ReadPages(int fd) {
1846 int page_length = sat_->page_length();
1847 int strict = sat_->strict();
1851 // Read our data back out of the file, into it's new location.
1852 lseek(fd, 0, SEEK_SET);
1853 for (int i = 0; i < sat_->disk_pages(); i++) {
1854 struct page_entry dst;
1855 if (!GetEmptyPage(&dst))
1857 // Retrieve expected pattern.
1858 dst.pattern = page_recs_[i].pattern;
1859 // Update page recordpage record.
1860 page_recs_[i].dst = dst.addr;
1862 // Read from the file into destination page.
1863 if (!ReadPageFromFile(fd, &dst)) {
1868 SectorValidatePage(page_recs_[i], &dst, i);
1870 // Ensure that the transfer ended up with correct data.
1872 // Record page index currently CRC checked.
1874 int errors = CrcCheckPage(&dst);
1876 logprintf(5, "Log: file miscompare at block %d, "
1877 "offset %x-%x. File: %s\n",
1878 i, i * page_length, ((i + 1) * page_length) - 1,
1883 errorcount_ += errors;
1885 if (!PutValidPage(&dst))
1892 // File IO work loop. Execute until marked done.
1893 int FileThread::Work() {
1898 logprintf(9, "Log: Starting file thread %d, file %s, device %s\n",
1901 devicename_.c_str());
1906 // Open the data IO file.
1913 // Load patterns into page records.
1914 page_recs_ = new struct PageRec[sat_->disk_pages()];
1915 for (int i = 0; i < sat_->disk_pages(); i++) {
1916 page_recs_[i].pattern = new struct Pattern();
1920 while (IsReadyToRun()) {
1921 // Do the file write.
1922 if (!(fileresult &= WritePages(fd)))
1925 // Do the file read.
1926 if (!(fileresult &= ReadPages(fd)))
1933 pages_copied_ = loops * sat_->disk_pages();
1940 logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
1941 thread_num_, status_, pages_copied_);
1945 bool NetworkThread::IsNetworkStopSet() {
1946 return !IsReadyToRunNoPause();
1949 bool NetworkSlaveThread::IsNetworkStopSet() {
1950 // This thread has no completion status.
1951 // It finishes whever there is no more data to be
1956 // Set ip name to use for Network IO.
1957 void NetworkThread::SetIP(const char *ipaddr_init) {
1958 strncpy(ipaddr_, ipaddr_init, 256);
1962 // Return 0 on error.
1963 bool NetworkThread::CreateSocket(int *psocket) {
1964 int sock = socket(AF_INET, SOCK_STREAM, 0);
1966 logprintf(0, "Process Error: Cannot open socket\n");
1975 // Close the socket.
1976 bool NetworkThread::CloseSocket(int sock) {
1981 // Initiate the tcp connection.
1982 bool NetworkThread::Connect(int sock) {
1983 struct sockaddr_in dest_addr;
1984 dest_addr.sin_family = AF_INET;
1985 dest_addr.sin_port = htons(kNetworkPort);
1986 memset(&(dest_addr.sin_zero), '\0', sizeof(dest_addr.sin_zero));
1988 // Translate dot notation to u32.
1989 if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) {
1990 logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_);
1996 if (-1 == connect(sock, reinterpret_cast<struct sockaddr *>(&dest_addr),
1997 sizeof(struct sockaddr))) {
1998 logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_);
2006 // Initiate the tcp connection.
2007 bool NetworkListenThread::Listen() {
2008 struct sockaddr_in sa;
2010 memset(&(sa.sin_zero), '\0', sizeof(sa.sin_zero));
2012 sa.sin_family = AF_INET;
2013 sa.sin_addr.s_addr = INADDR_ANY;
2014 sa.sin_port = htons(kNetworkPort);
2016 if (-1 == bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
2018 sat_strerror(errno, buf, sizeof(buf));
2019 logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
2028 // Wait for a connection from a network traffic generation thread.
2029 bool NetworkListenThread::Wait() {
2034 // Watch sock_ to see when it has input.
2036 FD_SET(sock_, &rfds);
2037 // Wait up to five seconds.
2041 retval = select(sock_ + 1, &rfds, NULL, NULL, &tv);
2043 return (retval > 0);
2046 // Wait for a connection from a network traffic generation thread.
2047 bool NetworkListenThread::GetConnection(int *pnewsock) {
2048 struct sockaddr_in sa;
2049 socklen_t size = sizeof(struct sockaddr_in);
2051 int newsock = accept(sock_, reinterpret_cast<struct sockaddr *>(&sa), &size);
2053 logprintf(0, "Process Error: Did not receive connection\n");
2058 *pnewsock = newsock;
2062 bool NetworkThread::SendPage(int sock, struct page_entry *src) {
2063 int page_length = sat_->page_length();
2064 char *address = static_cast<char*>(src->addr);
2066 // Send our data over the network.
2067 int size = page_length;
2069 int transferred = send(sock, address + (page_length - size), size, 0);
2070 if ((transferred == 0) || (transferred == -1)) {
2071 if (!IsNetworkStopSet()) {
2073 sat_strerror(errno, buf, sizeof(buf));
2074 logprintf(0, "Process Error: Thread %d, "
2075 "Network write failed, bailing. (%s)\n",
2080 size = size - transferred;
2086 bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
2087 int page_length = sat_->page_length();
2088 char *address = static_cast<char*>(dst->addr);
2090 // Maybe we will get our data back again, maybe not.
2091 int size = page_length;
2093 int transferred = recv(sock, address + (page_length - size), size, 0);
2094 if ((transferred == 0) || (transferred == -1)) {
2095 // Typically network slave thread should exit as network master
2096 // thread stops sending data.
2097 if (IsNetworkStopSet()) {
2099 if (transferred == 0 && err == 0) {
2100 // Two system setups will not sync exactly,
2101 // allow early exit, but log it.
2102 logprintf(0, "Log: Net thread did not receive any data, exiting.\n");
2105 sat_strerror(err, buf, sizeof(buf));
2106 // Print why we failed.
2107 logprintf(0, "Process Error: Thread %d, "
2108 "Network read failed, bailing (%s).\n",
2110 // Print arguments and results.
2111 logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n",
2112 sock, address + (page_length - size),
2113 size, transferred, err);
2114 if ((transferred == 0) &&
2115 (page_length - size < 512) &&
2116 (page_length - size > 0)) {
2117 // Print null terminated data received, to see who's been
2118 // sending us supicious unwanted data.
2119 address[page_length - size] = 0;
2120 logprintf(0, "Log: received %d bytes: '%s'\n",
2121 page_length - size, address);
2127 size = size - transferred;
2133 // Network IO work loop. Execute until marked done.
2134 int NetworkThread::Work() {
2135 logprintf(9, "Log: Starting network thread %d, ip %s\n",
2141 if (!CreateSocket(&sock))
2144 // Network IO loop requires network slave thread to have already initialized.
2145 // We will sleep here for awhile to ensure that the slave thread will be
2146 // listening by the time we connect.
2147 // Sleep for 15 seconds.
2149 logprintf(9, "Log: Starting execution of network thread %d, ip %s\n",
2154 // Connect to a slave thread.
2160 int strict = sat_->strict();
2162 while (IsReadyToRun()) {
2163 struct page_entry src;
2164 struct page_entry dst;
2165 result &= sat_->GetValid(&src);
2166 result &= sat_->GetEmpty(&dst);
2168 logprintf(0, "Process Error: net_thread failed to pop pages, "
2173 // Check data correctness.
2177 // Do the network write.
2178 if (!(result &= SendPage(sock, &src)))
2181 // Update pattern reference to reflect new contents.
2182 dst.pattern = src.pattern;
2184 // Do the network read.
2185 if (!(result &= ReceivePage(sock, &dst)))
2188 // Ensure that the transfer ended up with correct data.
2192 // Return all of our pages to the queue.
2193 result &= sat_->PutValid(&dst);
2194 result &= sat_->PutEmpty(&src);
2196 logprintf(0, "Process Error: net_thread failed to push pages, "
2203 pages_copied_ = loops;
2209 logprintf(9, "Log: Completed %d: network thread status %d, "
2210 "%d pages copied\n",
2211 thread_num_, status_, pages_copied_);
2215 // Spawn slave threads for incoming connections.
2216 bool NetworkListenThread::SpawnSlave(int newsock, int threadid) {
2217 logprintf(12, "Log: Listen thread spawning slave\n");
2219 // Spawn slave thread, to reflect network traffic back to sender.
2220 ChildWorker *child_worker = new ChildWorker;
2221 child_worker->thread.SetSock(newsock);
2222 child_worker->thread.InitThread(threadid, sat_, os_, patternlist_,
2223 &child_worker->status);
2224 child_worker->status.Initialize();
2225 child_worker->thread.SpawnThread();
2226 child_workers_.push_back(child_worker);
2231 // Reap slave threads.
2232 bool NetworkListenThread::ReapSlaves() {
2234 // Gather status and reap threads.
2235 logprintf(12, "Log: Joining all outstanding threads\n");
2237 for (int i = 0; i < child_workers_.size(); i++) {
2238 NetworkSlaveThread& child_thread = child_workers_[i]->thread;
2239 logprintf(12, "Log: Joining slave thread %d\n", i);
2240 child_thread.JoinThread();
2241 if (child_thread.GetStatus() != 1) {
2242 logprintf(0, "Process Error: Slave Thread %d failed with status %d\n", i,
2243 child_thread.GetStatus());
2246 errorcount_ += child_thread.GetErrorCount();
2247 logprintf(9, "Log: Slave Thread %d found %lld miscompares\n", i,
2248 child_thread.GetErrorCount());
2249 pages_copied_ += child_thread.GetPageCount();
2255 // Network listener IO work loop. Execute until marked done.
2256 int NetworkListenThread::Work() {
2258 logprintf(9, "Log: Starting network listen thread %d\n",
2263 if (!CreateSocket(&sock_))
2265 logprintf(9, "Log: Listen thread created sock\n");
2267 // Allows incoming connections to be queued up by socket library.
2270 logprintf(12, "Log: Listen thread waiting for incoming connections\n");
2272 // Wait on incoming connections, and spawn worker threads for them.
2273 int threadcount = 0;
2274 while (IsReadyToRun()) {
2275 // Poll for connections that we can accept().
2277 // Accept those connections.
2278 logprintf(12, "Log: Listen thread found incoming connection\n");
2279 if (GetConnection(&newsock)) {
2280 SpawnSlave(newsock, threadcount);
2286 // Gather status and join spawned threads.
2289 // Delete the child workers.
2290 for (ChildVector::iterator it = child_workers_.begin();
2291 it != child_workers_.end(); ++it) {
2292 (*it)->status.Destroy();
2295 child_workers_.clear();
2301 "Log: Completed %d: network listen thread status %d, "
2302 "%d pages copied\n",
2303 thread_num_, status_, pages_copied_);
2307 // Set network reflector socket struct.
2308 void NetworkSlaveThread::SetSock(int sock) {
2312 // Network reflector IO work loop. Execute until marked done.
2313 int NetworkSlaveThread::Work() {
2314 logprintf(9, "Log: Starting network slave thread %d\n",
2317 // Verify that we have a socket.
2324 // Init a local buffer for storing data.
2325 void *local_page = NULL;
2326 int result = posix_memalign(&local_page, 512, sat_->page_length());
2328 logprintf(0, "Process Error: net slave posix_memalign "
2329 "returned %d (fail)\n",
2335 struct page_entry page;
2336 page.addr = local_page;
2338 // This thread will continue to run as long as the thread on the other end of
2339 // the socket is still sending and receiving data.
2341 // Do the network read.
2342 if (!ReceivePage(sock, &page))
2345 // Do the network write.
2346 if (!SendPage(sock, &page))
2352 pages_copied_ = loops;
2353 // No results provided from this type of thread.
2360 "Log: Completed %d: network slave thread status %d, "
2361 "%d pages copied\n",
2362 thread_num_, status_, pages_copied_);
2366 // Thread work loop. Execute until marked finished.
2367 int ErrorPollThread::Work() {
2368 logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_);
2370 // This calls a generic error polling function in the Os abstraction layer.
2372 errorcount_ += os_->ErrorPoll();
2374 } while (IsReadyToRun());
2376 logprintf(9, "Log: Finished system error poll thread %d: %d errors\n",
2377 thread_num_, errorcount_);
2382 // Worker thread to heat up CPU.
2383 int CpuStressThread::Work() {
2384 logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_);
2387 // Run ludloff's platform/CPU-specific assembly workload.
2388 os_->CpuStressWorkload();
2390 } while (IsReadyToRun());
2392 logprintf(9, "Log: Finished CPU stress thread %d:\n",
2398 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
2399 int cacheline_count,
2402 cc_cacheline_data_ = data;
2403 cc_cacheline_count_ = cacheline_count;
2404 cc_thread_num_ = thread_num;
2405 cc_inc_count_ = inc_count;
2408 // Worked thread to test the cache coherency of the CPUs
2409 int CpuCacheCoherencyThread::Work() {
2410 logprintf(9, "Log: Starting the Cache Coherency thread %d\n",
2412 uint64 time_start, time_end;
2415 unsigned int seed = static_cast<unsigned int>(gettid());
2416 gettimeofday(&tv, NULL); // Get the timestamp before increments.
2417 time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
2419 uint64 total_inc = 0; // Total increments done by the thread.
2420 while (IsReadyToRun()) {
2421 for (int i = 0; i < cc_inc_count_; i++) {
2422 // Choose a datastructure in random and increment the appropriate
2423 // member in that according to the offset (which is the same as the
2425 int r = rand_r(&seed);
2426 r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
2427 // Increment the member of the randomely selected structure.
2428 (cc_cacheline_data_[r].num[cc_thread_num_])++;
2431 total_inc += cc_inc_count_;
2433 // Calculate if the local counter matches with the global value
2434 // in all the cache line structures for this particular thread.
2435 int cc_global_num = 0;
2436 for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
2437 cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
2438 // Reset the cachline member's value for the next run.
2439 cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
2441 if (sat_->error_injection())
2444 if (cc_global_num != cc_inc_count_) {
2446 logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
2447 cc_global_num, cc_inc_count_);
2450 gettimeofday(&tv, NULL); // Get the timestamp at the end.
2451 time_end = tv.tv_sec * 1000000ULL + tv.tv_usec;
2453 uint64 us_elapsed = time_end - time_start;
2454 // inc_rate is the no. of increments per second.
2455 double inc_rate = total_inc * 1e6 / us_elapsed;
2457 logprintf(4, "Stats: CC Thread(%d): Time=%llu us,"
2458 " Increments=%llu, Increments/sec = %.6lf\n",
2459 cc_thread_num_, us_elapsed, total_inc, inc_rate);
2460 logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n",
2466 DiskThread::DiskThread(DiskBlockTable *block_table) {
2467 read_block_size_ = kSectorSize; // default 1 sector (512 bytes)
2468 write_block_size_ = kSectorSize; // this assumes read and write block size
2470 segment_size_ = -1; // use the entire disk as one segment
2471 cache_size_ = 16 * 1024 * 1024; // assume 16MiB cache by default
2472 // Use a queue such that 3/2 times as much data as the cache can hold
2473 // is written before it is read so that there is little chance the read
2474 // data is in the cache.
2475 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2;
2476 blocks_per_segment_ = 32;
2478 read_threshold_ = 100000; // 100ms is a reasonable limit for
2479 write_threshold_ = 100000; // reading/writing a sector
2481 read_timeout_ = 5000000; // 5 seconds should be long enough for a
2482 write_timeout_ = 5000000; // timout for reading/writing
2484 device_sectors_ = 0;
2485 non_destructive_ = 0;
2488 block_table_ = block_table;
2489 update_block_table_ = 1;
2491 block_buffer_ = NULL;
2494 DiskThread::~DiskThread() {
2497 // Set filename for device file (in /dev).
2498 void DiskThread::SetDevice(const char *device_name) {
2499 device_name_ = device_name;
2502 // Set various parameters that control the behaviour of the test.
2503 // -1 is used as a sentinel value on each parameter (except non_destructive)
2504 // to indicate that the parameter not be set.
2505 bool DiskThread::SetParameters(int read_block_size,
2506 int write_block_size,
2509 int blocks_per_segment,
2510 int64 read_threshold,
2511 int64 write_threshold,
2512 int non_destructive) {
2513 if (read_block_size != -1) {
2514 // Blocks must be aligned to the disk's sector size.
2515 if (read_block_size % kSectorSize != 0) {
2516 logprintf(0, "Process Error: Block size must be a multiple of %d "
2517 "(thread %d).\n", kSectorSize, thread_num_);
2521 read_block_size_ = read_block_size;
2524 if (write_block_size != -1) {
2525 // Write blocks must be aligned to the disk's sector size and to the
2527 if (write_block_size % kSectorSize != 0) {
2528 logprintf(0, "Process Error: Write block size must be a multiple "
2529 "of %d (thread %d).\n", kSectorSize, thread_num_);
2532 if (write_block_size % read_block_size_ != 0) {
2533 logprintf(0, "Process Error: Write block size must be a multiple "
2534 "of the read block size, which is %d (thread %d).\n",
2535 read_block_size_, thread_num_);
2539 write_block_size_ = write_block_size;
2542 // Make sure write_block_size_ is still valid.
2543 if (read_block_size_ > write_block_size_) {
2544 logprintf(5, "Log: Assuming write block size equal to read block size, "
2545 "which is %d (thread %d).\n", read_block_size_,
2547 write_block_size_ = read_block_size_;
2549 if (write_block_size_ % read_block_size_ != 0) {
2550 logprintf(0, "Process Error: Write block size (defined as %d) must "
2551 "be a multiple of the read block size, which is %d "
2552 "(thread %d).\n", write_block_size_, read_block_size_,
2559 if (cache_size != -1) {
2560 cache_size_ = cache_size;
2563 if (blocks_per_segment != -1) {
2564 if (blocks_per_segment <= 0) {
2565 logprintf(0, "Process Error: Blocks per segment must be greater than "
2566 "zero.\n (thread %d)", thread_num_);
2570 blocks_per_segment_ = blocks_per_segment;
2573 if (read_threshold != -1) {
2574 if (read_threshold <= 0) {
2575 logprintf(0, "Process Error: Read threshold must be greater than "
2576 "zero (thread %d).\n", thread_num_);
2580 read_threshold_ = read_threshold;
2583 if (write_threshold != -1) {
2584 if (write_threshold <= 0) {
2585 logprintf(0, "Process Error: Write threshold must be greater than "
2586 "zero (thread %d).\n", thread_num_);
2590 write_threshold_ = write_threshold;
2593 if (segment_size != -1) {
2594 // Segments must be aligned to the disk's sector size.
2595 if (segment_size % kSectorSize != 0) {
2596 logprintf(0, "Process Error: Segment size must be a multiple of %d"
2597 " (thread %d).\n", kSectorSize, thread_num_);
2601 segment_size_ = segment_size / kSectorSize;
2604 non_destructive_ = non_destructive;
2606 // Having a queue of 150% of blocks that will fit in the disk's cache
2607 // should be enough to force out the oldest block before it is read and hence,
2608 // making sure the data comes form the disk and not the cache.
2609 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2;
2610 // Updating DiskBlockTable parameters
2611 if (update_block_table_) {
2612 block_table_->SetParameters(kSectorSize, write_block_size_,
2613 device_sectors_, segment_size_,
2619 bool DiskThread::OpenDevice(int *pfile) {
2620 int fd = open(device_name_.c_str(),
2621 O_RDWR | O_SYNC | O_DIRECT | O_LARGEFILE,
2624 logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
2625 device_name_.c_str(), thread_num_);
2630 return GetDiskSize(fd);
2633 // Retrieves the size (in bytes) of the disk/file.
2634 bool DiskThread::GetDiskSize(int fd) {
2635 struct stat device_stat;
2636 if (fstat(fd, &device_stat) == -1) {
2637 logprintf(0, "Process Error: Unable to fstat disk %s (thread %d).\n",
2638 device_name_.c_str(), thread_num_);
2642 // For a block device, an ioctl is needed to get the size since the size
2643 // of the device file (i.e. /dev/sdb) is 0.
2644 if (S_ISBLK(device_stat.st_mode)) {
2645 uint64 block_size = 0;
2647 if (ioctl(fd, BLKGETSIZE64, &block_size) == -1) {
2648 logprintf(0, "Process Error: Unable to ioctl disk %s (thread %d).\n",
2649 device_name_.c_str(), thread_num_);
2653 // If an Elephant is initialized with status DEAD its size will be zero.
2654 if (block_size == 0) {
2655 os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
2657 status_ = 1; // Avoid a procedural error.
2661 device_sectors_ = block_size / kSectorSize;
2663 } else if (S_ISREG(device_stat.st_mode)) {
2664 device_sectors_ = device_stat.st_size / kSectorSize;
2667 logprintf(0, "Process Error: %s is not a regular file or block "
2668 "device (thread %d).\n", device_name_.c_str(),
2673 logprintf(12, "Log: Device sectors: %lld on disk %s (thread %d).\n",
2674 device_sectors_, device_name_.c_str(), thread_num_);
2676 if (update_block_table_) {
2677 block_table_->SetParameters(kSectorSize, write_block_size_,
2678 device_sectors_, segment_size_,
2685 bool DiskThread::CloseDevice(int fd) {
2690 // Return the time in microseconds.
2691 int64 DiskThread::GetTime() {
2693 gettimeofday(&tv, NULL);
2694 return tv.tv_sec * 1000000 + tv.tv_usec;
2697 bool DiskThread::DoWork(int fd) {
2698 int64 block_num = 0;
2699 blocks_written_ = 0;
2703 if (segment_size_ == -1) {
2706 num_segments = device_sectors_ / segment_size_;
2707 if (device_sectors_ % segment_size_ != 0)
2711 // Disk size should be at least 3x cache size. See comment later for
2713 sat_assert(device_sectors_ * kSectorSize > 3 * cache_size_);
2715 // This disk test works by writing blocks with a certain pattern to
2716 // disk, then reading them back and verifying it against the pattern
2717 // at a later time. A failure happens when either the block cannot
2718 // be written/read or when the read block is different than what was
2719 // written. If a block takes too long to write/read, then a warning
2720 // is given instead of an error since taking too long is not
2721 // necessarily an error.
2723 // To prevent the read blocks from coming from the disk cache,
2724 // enough blocks are written before read such that a block would
2725 // be ejected from the disk cache by the time it is read.
2727 // TODO(amistry): Implement some sort of read/write throttling. The
2728 // flood of asynchronous I/O requests when a drive is
2729 // unplugged is causing the application and kernel to
2730 // become unresponsive.
2732 while (IsReadyToRun()) {
2733 // Write blocks to disk.
2734 logprintf(16, "Write phase for disk %s (thread %d).\n",
2735 device_name_.c_str(), thread_num_);
2736 while (IsReadyToRunNoPause() &&
2737 in_flight_sectors_.size() < queue_size_ + 1) {
2738 // Confine testing to a particular segment of the disk.
2739 int64 segment = (block_num / blocks_per_segment_) % num_segments;
2740 if (block_num % blocks_per_segment_ == 0) {
2741 logprintf(20, "Log: Starting to write segment %lld out of "
2742 "%lld on disk %s (thread %d).\n",
2743 segment, num_segments, device_name_.c_str(),
2748 BlockData *block = block_table_->GetUnusedBlock(segment);
2750 // If an unused sequence of sectors could not be found, skip to the
2751 // next block to process. Soon, a new segment will come and new
2752 // sectors will be able to be allocated. This effectively puts a
2753 // minumim on the disk size at 3x the stated cache size, or 48MiB
2754 // if a cache size is not given (since the cache is set as 16MiB
2755 // by default). Given that todays caches are at the low MiB range
2756 // and drive sizes at the mid GB, this shouldn't pose a problem.
2757 // The 3x minimum comes from the following:
2758 // 1. In order to allocate 'y' blocks from a segment, the
2759 // segment must contain at least 2y blocks or else an
2760 // allocation may not succeed.
2761 // 2. Assume the entire disk is one segment.
2762 // 3. A full write phase consists of writing blocks corresponding to
2764 // 4. Therefore, the one segment must have 2 * 3/2 * cache
2765 // size worth of blocks = 3 * cache size worth of blocks
2767 // In non-destructive mode, don't write anything to disk.
2768 if (!non_destructive_) {
2769 if (!WriteBlockToDisk(fd, block)) {
2770 block_table_->RemoveBlock(block);
2775 block->SetBlockAsInitialized();
2778 in_flight_sectors_.push(block);
2781 // Verify blocks on disk.
2782 logprintf(20, "Read phase for disk %s (thread %d).\n",
2783 device_name_.c_str(), thread_num_);
2784 while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) {
2785 BlockData *block = in_flight_sectors_.front();
2786 in_flight_sectors_.pop();
2787 ValidateBlockOnDisk(fd, block);
2788 block_table_->RemoveBlock(block);
2793 pages_copied_ = blocks_written_ + blocks_read_;
2797 // Do an asynchronous disk I/O operation.
2798 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
2799 int64 offset, int64 timeout) {
2800 // Use the Linux native asynchronous I/O interface for reading/writing.
2801 // A read/write consists of three basic steps:
2802 // 1. create an io context.
2803 // 2. prepare and submit an io request to the context
2804 // 3. wait for an event on the context.
2809 const char *error_str;
2811 { IOCB_CMD_PREAD, "read", "disk-read-error" },
2812 { IOCB_CMD_PWRITE, "write", "disk-write-error" }
2816 memset(&cb, 0, sizeof(cb));
2819 cb.aio_lio_opcode = operations[op].opcode;
2820 cb.aio_buf = (__u64)buf;
2821 cb.aio_nbytes = size;
2822 cb.aio_offset = offset;
2824 struct iocb *cbs[] = { &cb };
2825 if (io_submit(aio_ctx_, 1, cbs) != 1) {
2826 logprintf(0, "Process Error: Unable to submit async %s "
2827 "on disk %s (thread %d).\n",
2828 operations[op].op_str, device_name_.c_str(),
2833 struct io_event event;
2834 memset(&event, 0, sizeof(event));
2836 tv.tv_sec = timeout / 1000000;
2837 tv.tv_nsec = (timeout % 1000000) * 1000;
2838 if (io_getevents(aio_ctx_, 1, 1, &event, &tv) != 1) {
2839 // A ctrl-c from the keyboard will cause io_getevents to fail with an
2840 // EINTR error code. This is not an error and so don't treat it as such,
2841 // but still log it.
2842 if (errno == EINTR) {
2843 logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n",
2844 operations[op].op_str, device_name_.c_str(),
2847 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
2849 logprintf(0, "Hardware Error: Timeout doing async %s to sectors "
2850 "starting at %lld on disk %s (thread %d).\n",
2851 operations[op].op_str, offset / kSectorSize,
2852 device_name_.c_str(), thread_num_);
2855 // Don't bother checking return codes since io_cancel seems to always fail.
2856 // Since io_cancel is always failing, destroying and recreating an I/O
2857 // context is a workaround for canceling an in-progress I/O operation.
2858 // TODO(amistry): Find out why io_cancel isn't working and make it work.
2859 io_cancel(aio_ctx_, &cb, &event);
2860 io_destroy(aio_ctx_);
2862 if (io_setup(5, &aio_ctx_)) {
2863 logprintf(0, "Process Error: Unable to create aio context on disk %s"
2865 device_name_.c_str(), thread_num_);
2871 // event.res contains the number of bytes written/read or
2872 // error if < 0, I think.
2873 if (event.res != size) {
2875 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
2877 if (event.res < 0) {
2878 switch (event.res) {
2880 logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
2881 "sectors starting at %lld on disk %s (thread %d).\n",
2882 operations[op].op_str, offset / kSectorSize,
2883 device_name_.c_str(), thread_num_);
2886 logprintf(0, "Hardware Error: Unknown error while doing %s to "
2887 "sectors starting at %lld on disk %s (thread %d).\n",
2888 operations[op].op_str, offset / kSectorSize,
2889 device_name_.c_str(), thread_num_);
2892 logprintf(0, "Hardware Error: Unable to %s to sectors starting at "
2893 "%lld on disk %s (thread %d).\n",
2894 operations[op].op_str, offset / kSectorSize,
2895 device_name_.c_str(), thread_num_);
2903 // Write a block to disk.
2904 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
2905 memset(block_buffer_, 0, block->GetSize());
2907 // Fill block buffer with a pattern
2908 struct page_entry pe;
2909 if (!sat_->GetValid(&pe)) {
2910 // Even though a valid page could not be obatined, it is not an error
2911 // since we can always fill in a pattern directly, albeit slower.
2912 unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
2913 block->SetPattern(patternlist_->GetRandomPattern());
2915 logprintf(11, "Log: Warning, using pattern fill fallback in "
2916 "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
2917 device_name_.c_str(), thread_num_);
2919 for (int i = 0; i < block->GetSize()/wordsize_; i++) {
2920 memblock[i] = block->GetPattern()->pattern(i);
2923 memcpy(block_buffer_, pe.addr, block->GetSize());
2924 block->SetPattern(pe.pattern);
2925 sat_->PutValid(&pe);
2928 logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
2930 block->GetSize()/kSectorSize, block->GetAddress(),
2931 device_name_.c_str(), thread_num_);
2933 int64 start_time = GetTime();
2935 if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
2936 block->GetAddress() * kSectorSize, write_timeout_)) {
2940 int64 end_time = GetTime();
2941 logprintf(12, "Log: Writing time: %lld us (thread %d).\n",
2942 end_time - start_time, thread_num_);
2943 if (end_time - start_time > write_threshold_) {
2944 logprintf(5, "Log: Write took %lld us which is longer than threshold "
2945 "%lld us on disk %s (thread %d).\n",
2946 end_time - start_time, write_threshold_, device_name_.c_str(),
2953 // Verify a block on disk.
2954 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
2955 int64 blocks = block->GetSize() / read_block_size_;
2956 int64 bytes_read = 0;
2957 int64 current_blocks;
2958 int64 current_bytes;
2959 uint64 address = block->GetAddress();
2961 logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
2963 address, device_name_.c_str(), thread_num_);
2965 // Read block from disk and time the read. If it takes longer than the
2966 // threshold, complain.
2967 if (lseek(fd, address * kSectorSize, SEEK_SET) == -1) {
2968 logprintf(0, "Process Error: Unable to seek to sector %lld in "
2969 "DiskThread::ValidateSectorsOnDisk on disk %s "
2970 "(thread %d).\n", address, device_name_.c_str(), thread_num_);
2973 int64 start_time = GetTime();
2975 // Split a large write-sized block into small read-sized blocks and
2976 // read them in groups of randomly-sized multiples of read block size.
2977 // This assures all data written on disk by this particular block
2978 // will be tested using a random reading pattern.
2980 while (blocks != 0) {
2981 // Test all read blocks in a written block.
2982 current_blocks = (random() % blocks) + 1;
2983 current_bytes = current_blocks * read_block_size_;
2985 memset(block_buffer_, 0, current_bytes);
2987 logprintf(20, "Log: Reading %lld sectors starting at sector %lld on "
2988 "disk %s (thread %d)\n",
2989 current_bytes / kSectorSize,
2990 (address * kSectorSize + bytes_read) / kSectorSize,
2991 device_name_.c_str(), thread_num_);
2993 if (!AsyncDiskIO(ASYNC_IO_READ, fd, block_buffer_, current_bytes,
2994 address * kSectorSize + bytes_read,
2999 int64 end_time = GetTime();
3000 logprintf(20, "Log: Reading time: %lld us (thread %d).\n",
3001 end_time - start_time, thread_num_);
3002 if (end_time - start_time > read_threshold_) {
3003 logprintf(5, "Log: Read took %lld us which is longer than threshold "
3004 "%lld us on disk %s (thread %d).\n",
3005 end_time - start_time, read_threshold_,
3006 device_name_.c_str(), thread_num_);
3009 // In non-destructive mode, don't compare the block to the pattern since
3010 // the block was never written to disk in the first place.
3011 if (!non_destructive_) {
3012 if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
3014 os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
3016 logprintf(0, "Hardware Error: Pattern mismatch in block starting at "
3017 "sector %lld in DiskThread::ValidateSectorsOnDisk on "
3018 "disk %s (thread %d).\n",
3019 address, device_name_.c_str(), thread_num_);
3023 bytes_read += current_blocks * read_block_size_;
3024 blocks -= current_blocks;
3030 int DiskThread::Work() {
3033 logprintf(9, "Log: Starting disk thread %d, disk %s\n",
3034 thread_num_, device_name_.c_str());
3036 srandom(time(NULL));
3038 if (!OpenDevice(&fd)) {
3042 // Allocate a block buffer aligned to 512 bytes since the kernel requires it
3043 // when using direst IO.
3045 int result = posix_memalign(&block_buffer_, kBufferAlignment,
3046 sat_->page_length());
3049 logprintf(0, "Process Error: Unable to allocate memory for buffers "
3050 "for disk %s (thread %d) posix memalign returned %d.\n",
3051 device_name_.c_str(), thread_num_, result);
3056 if (io_setup(5, &aio_ctx_)) {
3057 logprintf(0, "Process Error: Unable to create aio context for disk %s"
3059 device_name_.c_str(), thread_num_);
3067 io_destroy(aio_ctx_);
3069 free(block_buffer_);
3071 logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
3072 "%d pages copied\n",
3073 thread_num_, device_name_.c_str(), status_, pages_copied_);
3077 RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
3078 : DiskThread(block_table) {
3079 update_block_table_ = 0;
3082 RandomDiskThread::~RandomDiskThread() {
3085 bool RandomDiskThread::DoWork(int fd) {
3087 blocks_written_ = 0;
3088 logprintf(11, "Random phase for disk %s (thread %d).\n",
3089 device_name_.c_str(), thread_num_);
3090 while (IsReadyToRun()) {
3091 BlockData *block = block_table_->GetRandomBlock();
3092 if (block == NULL) {
3093 logprintf(12, "No block available for device %s (thread %d).\n",
3094 device_name_.c_str(), thread_num_);
3096 ValidateBlockOnDisk(fd, block);
3097 block_table_->ReleaseBlock(block);
3101 pages_copied_ = blocks_read_;
3105 MemoryRegionThread::MemoryRegionThread() {
3106 error_injection_ = false;
3110 MemoryRegionThread::~MemoryRegionThread() {
3115 bool MemoryRegionThread::SetRegion(void *region, int64 size) {
3116 int plength = sat_->page_length();
3117 int npages = size / plength;
3118 if (size % plength) {
3119 logprintf(0, "Process Error: region size is not a multiple of SAT "
3125 pages_ = new PageEntryQueue(npages);
3126 char *base_addr = reinterpret_cast<char*>(region);
3127 region_ = base_addr;
3128 for (int i = 0; i < npages; i++) {
3129 struct page_entry pe;
3131 pe.addr = reinterpret_cast<void*>(base_addr + i * plength);
3132 pe.offset = i * plength;
3140 void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
3142 const char *message) {
3143 uint32 buffer_offset;
3144 if (phase_ == kPhaseCopy) {
3145 // If the error occurred on the Copy Phase, it means that
3146 // the source data (i.e., the main memory) is wrong. so
3147 // just pass it to the original ProcessError to call a
3149 WorkerThread::ProcessError(error, priority, message);
3150 } else if (phase_ == kPhaseCheck) {
3151 // A error on the Check Phase means that the memory region tested
3152 // has an error. Gathering more information and then reporting
3154 // Determine if this is a write or read error.
3155 os_->Flush(error->vaddr);
3156 error->reread = *(error->vaddr);
3157 char *good = reinterpret_cast<char*>(&(error->expected));
3158 char *bad = reinterpret_cast<char*>(&(error->actual));
3159 sat_assert(error->expected != error->actual);
3160 unsigned int offset = 0;
3161 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
3162 if (good[offset] != bad[offset])
3166 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
3168 buffer_offset = error->vbyteaddr - region_;
3170 // Find physical address if possible.
3171 error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
3173 "%s: miscompare on %s, CRC check at %p(0x%llx), "
3174 "offset %llx: read:0x%016llx, reread:0x%016llx "
3175 "expected:0x%016llx\n",
3177 identifier_.c_str(),
3185 logprintf(0, "Process Error: memory region thread raised an "
3186 "unexpected error.");
3190 int MemoryRegionThread::Work() {
3191 struct page_entry source_pe;
3192 struct page_entry memregion_pe;
3195 const uint64 error_constant = 0x00ba00000000ba00LL;
3197 // For error injection.
3202 logprintf(9, "Log: Starting Memory Region thread %d\n", thread_num_);
3204 while (IsReadyToRun()) {
3205 // Getting pages from SAT and queue.
3206 phase_ = kPhaseNoPhase;
3207 result &= sat_->GetValid(&source_pe);
3209 logprintf(0, "Process Error: memory region thread failed to pop "
3210 "pages from SAT, bailing\n");
3214 result &= pages_->PopRandom(&memregion_pe);
3216 logprintf(0, "Process Error: memory region thread failed to pop "
3217 "pages from queue, bailing\n");
3221 // Error injection for CRC copy.
3222 if ((sat_->error_injection() || error_injection_) && loops == 1) {
3223 addr = reinterpret_cast<int64*>(source_pe.addr);
3224 offset = random() % (sat_->page_length() / wordsize_);
3225 data = addr[offset];
3226 addr[offset] = error_constant;
3229 // Copying SAT page into memory region.
3230 phase_ = kPhaseCopy;
3231 CrcCopyPage(&memregion_pe, &source_pe);
3232 memregion_pe.pattern = source_pe.pattern;
3234 // Error injection for CRC Check.
3235 if ((sat_->error_injection() || error_injection_) && loops == 2) {
3236 addr = reinterpret_cast<int64*>(memregion_pe.addr);
3237 offset = random() % (sat_->page_length() / wordsize_);
3238 data = addr[offset];
3239 addr[offset] = error_constant;
3242 // Checking page content in memory region.
3243 phase_ = kPhaseCheck;
3244 CrcCheckPage(&memregion_pe);
3246 phase_ = kPhaseNoPhase;
3247 // Storing pages on their proper queues.
3248 result &= sat_->PutValid(&source_pe);
3250 logprintf(0, "Process Error: memory region thread failed to push "
3251 "pages into SAT, bailing\n");
3254 result &= pages_->Push(&memregion_pe);
3256 logprintf(0, "Process Error: memory region thread failed to push "
3257 "pages into queue, bailing\n");
3261 if ((sat_->error_injection() || error_injection_) &&
3262 loops >= 1 && loops <= 2) {
3263 addr[offset] = data;
3270 pages_copied_ = loops;
3272 logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d "
3273 "pages checked\n", thread_num_, status_, pages_copied_);