PORT 22

[stressapptest] / src / worker.cc
diff --git a/src/worker.cc b/src/worker.cc

index 39322d22f0472b3fa3405ceeec586d7d49977a09..922d2c1ca8b9680ae2dcfe32870019922279e3f6 100644 (file)
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -44,7 +44,9 @@
  #include <sys/ioctl.h>
  #include <linux/fs.h>
  // For asynchronous I/O
-#include <linux/aio_abi.h>
+#ifdef HAVE_LIBAIO_H
+#include <libaio.h>
+#endif
  
  #include <sys/syscall.h>
  
@@ -75,40 +77,7 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
            unsigned int, len, cpu_set_t*, mask)
  #endif
  
-// Linux aio syscalls.
-#if !defined(__NR_io_setup)
-#define __NR_io_setup   206
-#define __NR_io_destroy 207
-#define __NR_io_getevents       208
-#define __NR_io_submit  209
-#define __NR_io_cancel  210
-#endif
-
-#define io_setup(nr_events, ctxp) \
-  syscall(__NR_io_setup, (nr_events), (ctxp))
-#define io_submit(ctx_id, nr, iocbpp) \
-  syscall(__NR_io_submit, (ctx_id), (nr), (iocbpp))
-#define io_getevents(ctx_id, io_getevents, nr, events, timeout) \
-  syscall(__NR_io_getevents, (ctx_id), (io_getevents), (nr), (events), \
-    (timeout))
-#define io_cancel(ctx_id, iocb, result) \
-  syscall(__NR_io_cancel, (ctx_id), (iocb), (result))
-#define io_destroy(ctx) \
-  syscall(__NR_io_destroy, (ctx))
-
  namespace {
-  // Get HW core ID from cpuid instruction.
-  inline int apicid(void) {
-    int cpu;
-#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
-    __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
-#else
-  #warning "Unsupported CPU type: unable to determine core ID."
-    cpu = 0;
-#endif
-    return (cpu >> 24);
-  }
-
    // Work around the sad fact that there are two (gnu, xsi) incompatible
    // versions of strerror_r floating around google. Awesome.
    bool sat_strerror(int err, char *buf, int len) {
@@ -130,7 +99,7 @@ namespace {
    inline uint64 addr_to_tag(void *address) {
      return reinterpret_cast<uint64>(address);
    }
-}
+}  // namespace
  
  #if !defined(O_DIRECT)
  // Sometimes this isn't available.
@@ -157,18 +126,21 @@ static void *ThreadSpawnerGeneric(void *ptr) {
    return NULL;
  }
  
-
  void WorkerStatus::Initialize() {
    sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
    sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
+#ifdef HAVE_PTHREAD_BARRIERS
    sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
                                         num_workers_ + 1));
+#endif
  }
  
  void WorkerStatus::Destroy() {
    sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
    sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
+#ifdef HAVE_PTHREAD_BARRIERS
    sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
+#endif
  }
  
  void WorkerStatus::PauseWorkers() {
@@ -186,10 +158,13 @@ void WorkerStatus::StopWorkers() {
      WaitOnPauseBarrier();
  }
  
-bool WorkerStatus::ContinueRunning() {
+bool WorkerStatus::ContinueRunning(bool *paused) {
    // This loop is an optimization.  We use it to immediately re-check the status
    // after resuming from a pause, instead of returning and waiting for the next
    // call to this function.
+  if (paused) {
+    *paused = false;
+  }
    for (;;) {
      switch (GetStatus()) {
        case RUN:
@@ -200,6 +175,10 @@ bool WorkerStatus::ContinueRunning() {
          WaitOnPauseBarrier();
          // Wait for ResumeWorkers() to be called.
          WaitOnPauseBarrier();
+        // Indicate that a pause occurred.
+        if (paused) {
+          *paused = true;
+        }
          break;
        case STOP:
          return false;
@@ -233,8 +212,10 @@ void WorkerStatus::RemoveSelf() {
    AcquireNumWorkersLock();
    // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
    // in use because (status != PAUSE).
+#ifdef HAVE_PTHREAD_BARRIERS
    sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
    sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
+#endif
    --num_workers_;
    ReleaseNumWorkersLock();
  
@@ -245,10 +226,10 @@ void WorkerStatus::RemoveSelf() {
  
  // Parent thread class.
  WorkerThread::WorkerThread() {
-  status_ = 0;
+  status_ = false;
    pages_copied_ = 0;
    errorcount_ = 0;
-  runduration_usec_ = 0;
+  runduration_usec_ = 1;
    priority_ = Normal;
    worker_status_ = NULL;
    thread_spawner_ = &ThreadSpawnerGeneric;
@@ -310,7 +291,7 @@ void WorkerThread::InitThread(int thread_num_init,
    patternlist_ = patternlist_init;
    worker_status_ = worker_status;
  
-  cpu_mask_ = AvailableCpus();
+  AvailableCpus(&cpu_mask_);
    tag_ = 0xffffffff;
  
    tag_mode_ = sat_->tag_mode();
@@ -321,12 +302,15 @@ void WorkerThread::InitThread(int thread_num_init,
  bool WorkerThread::InitPriority() {
    // This doesn't affect performance that much, and may not be too safe.
  
-  bool ret = BindToCpus(cpu_mask_);
+  bool ret = BindToCpus(&cpu_mask_);
    if (!ret)
-    logprintf(11, "Log: Bind to %x failed.\n", cpu_mask_);
+    logprintf(11, "Log: Bind to %s failed.\n",
+              cpuset_format(&cpu_mask_).c_str());
  
-  logprintf(11, "Log: Thread %d running on apic ID %d mask %x (%x).\n",
-            thread_num_, apicid(), CurrentCpus(), cpu_mask_);
+  logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n",
+            thread_num_, sched_getcpu(),
+            CurrentCpusFormat().c_str(),
+            cpuset_format(&cpu_mask_).c_str());
  #if 0
    if (priority_ == High) {
      sched_param param;
@@ -356,7 +340,7 @@ int WorkerThread::SpawnThread() {
      logprintf(0, "Process Error: pthread_create "
                    "failed - error %d %s\n", result,
                buf);
-    status_ += 1;
+    status_ = false;
      return false;
    }
  
@@ -365,18 +349,17 @@ int WorkerThread::SpawnThread() {
  }
  
  // Kill the worker thread with SIGINT.
-int WorkerThread::KillThread() {
-  pthread_kill(thread_, SIGINT);
-  return 0;
+bool WorkerThread::KillThread() {
+  return (pthread_kill(thread_, SIGINT) == 0);
  }
  
  // Block until thread has exited.
-int WorkerThread::JoinThread() {
+bool WorkerThread::JoinThread() {
    int result = pthread_join(thread_, NULL);
  
    if (result) {
      logprintf(0, "Process Error: pthread_join failed - error %d\n", result);
-    status_ = 0;
+    status_ = false;
    }
  
    // 0 is pthreads success.
@@ -394,14 +377,14 @@ void WorkerThread::StartRoutine() {
  
  
  // Thread work loop. Execute until marked finished.
-int WorkerThread::Work() {
+bool WorkerThread::Work() {
    do {
      logprintf(9, "Log: ...\n");
      // Sleep for 1 second.
      sat_sleep(1);
    } while (IsReadyToRun());
  
-  return 0;
+  return false;
  }
  
  
@@ -409,11 +392,13 @@ int WorkerThread::Work() {
  // Conceptually, each bit represents a logical CPU, ie:
  //   mask = 3  (11b):   cpu0, 1
  //   mask = 13 (1101b): cpu0, 2, 3
-uint32 WorkerThread::AvailableCpus() {
-  cpu_set_t curr_cpus;
-  CPU_ZERO(&curr_cpus);
-  sched_getaffinity(getppid(), sizeof(curr_cpus), &curr_cpus);
-  return cpuset_to_uint32(&curr_cpus);
+bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+#ifdef HAVE_SCHED_GETAFFINITY
+  return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0;
+#else
+  return 0;
+#endif
  }
  
  
@@ -421,11 +406,13 @@ uint32 WorkerThread::AvailableCpus() {
  // Conceptually, each bit represents a logical CPU, ie:
  //   mask = 3  (11b):   cpu0, 1
  //   mask = 13 (1101b): cpu0, 2, 3
-uint32 WorkerThread::CurrentCpus() {
-  cpu_set_t curr_cpus;
-  CPU_ZERO(&curr_cpus);
-  sched_getaffinity(0, sizeof(curr_cpus), &curr_cpus);
-  return cpuset_to_uint32(&curr_cpus);
+bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+#ifdef HAVE_SCHED_GETAFFINITY
+  return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0;
+#else
+  return 0;
+#endif
  }
  
  
@@ -437,21 +424,26 @@ uint32 WorkerThread::CurrentCpus() {
  //                  mask = 13 (1101b): cpu0, 2, 3
  //
  //   Returns true on success, false otherwise.
-bool WorkerThread::BindToCpus(uint32 thread_mask) {
-  uint32 process_mask = AvailableCpus();
-  if (thread_mask == process_mask)
+bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) {
+  cpu_set_t process_mask;
+  AvailableCpus(&process_mask);
+  if (cpuset_isequal(thread_mask, &process_mask))
      return true;
  
-  logprintf(11, "Log: available CPU mask - %x\n", process_mask);
-  if ((thread_mask | process_mask) != process_mask) {
+  logprintf(11, "Log: available CPU mask - %s\n",
+            cpuset_format(&process_mask).c_str());
+  if (!cpuset_issubset(thread_mask, &process_mask)) {
      // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist.
-    logprintf(0, "Log: requested CPUs %x not a subset of available %x\n",
-              thread_mask, process_mask);
+    logprintf(0, "Log: requested CPUs %s not a subset of available %s\n",
+              cpuset_format(thread_mask).c_str(),
+              cpuset_format(&process_mask).c_str());
      return false;
    }
-  cpu_set_t cpuset;
-  cpuset_from_uint32(thread_mask, &cpuset);
-  return (sched_setaffinity(gettid(), sizeof(cpuset), &cpuset) == 0);
+#ifdef HAVE_SCHED_GETAFFINITY
+  return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0);
+#else
+  return 0;
+#endif
  }
  
  
@@ -533,8 +525,8 @@ bool FillThread::FillPageRandom(struct page_entry *pe) {
  
  
  // Memory fill work loop. Execute until alloted pages filled.
-int FillThread::Work() {
-  int result = 1;
+bool FillThread::Work() {
+  bool result = true;
  
    logprintf(9, "Log: Starting fill thread %d\n", thread_num_);
  
@@ -544,7 +536,7 @@ int FillThread::Work() {
    struct page_entry pe;
    int64 loops = 0;
    while (IsReadyToRun() && (loops < num_pages_to_fill_)) {
-    result &= sat_->GetEmpty(&pe);
+    result = result && sat_->GetEmpty(&pe);
      if (!result) {
        logprintf(0, "Process Error: fill_thread failed to pop pages, "
                  "bailing\n");
@@ -552,11 +544,11 @@ int FillThread::Work() {
      }
  
      // Fill the page with pattern
-    result &= FillPageRandom(&pe);
+    result = result && FillPageRandom(&pe);
      if (!result) break;
  
      // Put the page back on the queue.
-    result &= sat_->PutValid(&pe);
+    result = result && sat_->PutValid(&pe);
      if (!result) {
        logprintf(0, "Process Error: fill_thread failed to push pages, "
                  "bailing\n");
@@ -570,7 +562,7 @@ int FillThread::Work() {
    status_ = result;
    logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n",
              thread_num_, status_, pages_copied_);
-  return 0;
+  return result;
  }
  
  
@@ -580,8 +572,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                  const char *message) {
    char dimm_string[256] = "";
  
-  int apic_id = apicid();
-  uint32 cpumask = CurrentCpus();
+  int core_id = sched_getcpu();
  
    // Determine if this is a write or read error.
    os_->Flush(error->vaddr);
@@ -613,11 +604,11 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                                (error->vaddr), 1);
  
      logprintf(priority,
-              "%s: miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
+              "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
                "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
                message,
-              apic_id,
-              cpumask,
+              core_id,
+              CurrentCpusFormat().c_str(),
                error->vaddr,
                error->paddr,
                dimm_string,
@@ -816,6 +807,9 @@ int WorkerThread::CheckRegion(void *addr,
        if ((state == kGoodAgain) || (state == kBad)) {
          unsigned int blockerrors = badend - badstart + 1;
          errormessage = "Block Error";
+        // It's okay for the 1st entry to be corrected multiple times,
+        // it will simply be reported twice. Once here and once below
+        // when processing the error queue.
          ProcessError(&recorded[0], 0, errormessage.c_str());
          logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
                    "%d bytes from offset 0x%x to 0x%x\n",
@@ -824,8 +818,6 @@ int WorkerThread::CheckRegion(void *addr,
                    blockerrors * wordsize_,
                    offset + badstart * wordsize_,
                    offset + badend * wordsize_);
-        errorcount_ += blockerrors;
-        return blockerrors;
        }
      }
    }
@@ -841,7 +833,6 @@ int WorkerThread::CheckRegion(void *addr,
  
    if (page_error) {
      // For each word in the data region.
-    int error_recount = 0;
      for (int i = 0; i < length / wordsize_; i++) {
        uint64 actual = memblock[i];
        uint64 expected;
@@ -860,21 +851,16 @@ int WorkerThread::CheckRegion(void *addr,
  
        // If the value is incorrect, save an error record for later printing.
        if (actual != expected) {
-        if (error_recount < kErrorLimit) {
-          // We already reported these.
-          error_recount++;
-        } else {
-          // If we have overflowed the error queue, print the errors now.
-          struct ErrorRecord er;
-          er.actual = actual;
-          er.expected = expected;
-          er.vaddr = &memblock[i];
-
-          // Do the error printout. This will take a long time and
-          // likely change the machine state.
-          ProcessError(&er, 12, errormessage.c_str());
-          overflowerrors++;
-        }
+        // If we have overflowed the error queue, print the errors now.
+        struct ErrorRecord er;
+        er.actual = actual;
+        er.expected = expected;
+        er.vaddr = &memblock[i];
+
+        // Do the error printout. This will take a long time and
+        // likely change the machine state.
+        ProcessError(&er, 12, errormessage.c_str());
+        overflowerrors++;
        }
      }
    }
@@ -949,8 +935,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
    char tag_dimm_string[256] = "";
    bool read_error = false;
  
-  int apic_id = apicid();
-  uint32 cpumask = CurrentCpus();
+  int core_id = sched_getcpu();
  
    // Determine if this is a write or read error.
    os_->Flush(error->vaddr);
@@ -978,14 +963,14 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
    if (priority < 5) {
      logprintf(priority,
                "%s: Tag from %p(0x%llx:%s) (%s) "
-              "miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
+              "miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
                "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
                message,
                error->tagvaddr, error->tagpaddr,
                tag_dimm_string,
                read_error ? "read error" : "write error",
-              apic_id,
-              cpumask,
+              core_id,
+              CurrentCpusFormat().c_str(),
                error->vaddr,
                error->paddr,
                dimm_string,
@@ -1090,6 +1075,52 @@ bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64,
    return true;
  }
  
+// x86_64 SSE2 assembly implementation of Adler memory copy, with address
+// tagging added as a second step. This is useful for debugging failures
+// that only occur when SSE / nontemporal writes are used.
+bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
+                                       uint64 *srcmem64,
+                                       unsigned int size_in_bytes,
+                                       AdlerChecksum *checksum,
+                                       struct page_entry *pe) {
+  // Do ASM copy, ignore checksum.
+  AdlerChecksum ignored_checksum;
+  os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
+
+  // Force cache flush of both the source and destination addresses.
+  //  length - length of block to flush in cachelines.
+  //  mem_increment - number of dstmem/srcmem values per cacheline.
+  int length = size_in_bytes / kCacheLineSize;
+  int mem_increment = kCacheLineSize / sizeof(*dstmem64);
+  OsLayer::FastFlushSync();
+  for (int i = 0; i < length; ++i) {
+    OsLayer::FastFlushHint(dstmem64 + (i * mem_increment));
+    OsLayer::FastFlushHint(srcmem64 + (i * mem_increment));
+  }
+  OsLayer::FastFlushSync();
+
+  // Check results.
+  AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
+  // Patch up address tags.
+  TagAddrC(dstmem64, size_in_bytes);
+  return true;
+}
+
+// Retag pages..
+bool WorkerThread::TagAddrC(uint64 *memwords,
+                            unsigned int size_in_bytes) {
+  // Mask is the bitmask of indexes used by the pattern.
+  // It is the pattern size -1. Size is always a power of 2.
+
+  // Select tag or data as appropriate.
+  int length = size_in_bytes / wordsize_;
+  for (int i = 0; i < length; i += 8) {
+    datacast_t data;
+    data.l64 = addr_to_tag(&memwords[i]);
+    memwords[i] = data.l64;
+  }
+  return true;
+}
  
  // C implementation of Adler memory crc.
  bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
@@ -1122,15 +1153,12 @@ bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
        if (data.l64 != src_tag)
          ReportTagError(&srcmem64[i], data.l64, src_tag);
  
-
        data.l32.l = pattern->pattern(i << 1);
        data.l32.h = pattern->pattern((i << 1) + 1);
        a1 = a1 + data.l32.l;
        b1 = b1 + a1;
        a1 = a1 + data.l32.h;
        b1 = b1 + a1;
-
-
      } else {
        data.l64 = srcmem64[i];
        a1 = a1 + data.l32.l;
@@ -1201,19 +1229,20 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                                     blocksize,
                                     currentblock * blocksize, 0);
            if (errorcount == 0) {
-            int apic_id = apicid();
-            uint32 cpumask = CurrentCpus();
-            logprintf(0, "Process Error: CPU %d(0x%x) CrcCopyPage "
+            int core_id = sched_getcpu();
+            logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
                           "CRC mismatch %s != %s, "
                           "but no miscompares found on second pass.\n",
-                      apic_id, cpumask,
+                      core_id, CurrentCpusFormat().c_str(),
                        crc.ToHexString().c_str(),
                        expectedcrc->ToHexString().c_str());
              struct ErrorRecord er;
              er.actual = sourcemem[0];
-            er.expected = 0x0;
+            er.expected = 0xbad00000ull << 32;
              er.vaddr = sourcemem;
              ProcessError(&er, 0, "Hardware Error");
+            errors += 1;
+            errorcount_ ++;
            }
          }
        }
@@ -1317,7 +1346,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
  
      AdlerChecksum crc;
      if (tag_mode_) {
-      AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
+      AdlerAddrMemcpyWarm(targetmem, sourcemem, blocksize, &crc, srcpe);
      } else {
        os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc);
      }
@@ -1332,10 +1361,10 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                     blocksize,
                                     currentblock * blocksize, 0);
        if (errorcount == 0) {
-        logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, "
+        logprintf(0, "Log: CrcWarmCopyPage CRC mismatch expected: %s != actual: %s, "
                       "but no miscompares found. Retrying with fresh data.\n",
-                  crc.ToHexString().c_str(),
-                  expectedcrc->ToHexString().c_str());
+                  expectedcrc->ToHexString().c_str(),
+                  crc.ToHexString().c_str() );
          if (!tag_mode_) {
            // Copy the data originally read from this region back again.
            // This data should have any corruption read originally while
@@ -1346,10 +1375,20 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                     blocksize,
                                     currentblock * blocksize, 0);
            if (errorcount == 0) {
-            logprintf(0, "Process Error: CrcWarmCopyPage CRC mismatch %s "
-                         "!= %s, but no miscompares found on second pass.\n",
+            int core_id = sched_getcpu();
+            logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
+                         "CRC mismatch %s != %s, "
+                         "but no miscompares found on second pass.\n",
+                      core_id, CurrentCpusFormat().c_str(),
                        crc.ToHexString().c_str(),
                        expectedcrc->ToHexString().c_str());
+            struct ErrorRecord er;
+            er.actual = sourcemem[0];
+            er.expected = 0xbad;
+            er.vaddr = sourcemem;
+            ProcessError(&er, 0, "Hardware Error");
+            errors ++;
+            errorcount_ ++;
            }
          }
        }
@@ -1388,23 +1427,23 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
  
  
  // Memory check work loop. Execute until done, then exhaust pages.
-int CheckThread::Work() {
+bool CheckThread::Work() {
    struct page_entry pe;
-  int result = 1;
+  bool result = true;
    int64 loops = 0;
  
    logprintf(9, "Log: Starting Check thread %d\n", thread_num_);
  
    // We want to check all the pages, and
    // stop when there aren't any left.
-  while (1) {
-    result &= sat_->GetValid(&pe);
+  while (true) {
+    result = result && sat_->GetValid(&pe);
      if (!result) {
        if (IsReadyToRunNoPause())
          logprintf(0, "Process Error: check_thread failed to pop pages, "
                    "bailing\n");
        else
-        result = 1;
+        result = true;
        break;
      }
  
@@ -1414,9 +1453,9 @@ int CheckThread::Work() {
      // Push pages back on the valid queue if we are still going,
      // throw them out otherwise.
      if (IsReadyToRunNoPause())
-      result &= sat_->PutValid(&pe);
+      result = result && sat_->PutValid(&pe);
      else
-      result &= sat_->PutEmpty(&pe);
+      result = result && sat_->PutEmpty(&pe);
      if (!result) {
        logprintf(0, "Process Error: check_thread failed to push pages, "
                  "bailing\n");
@@ -1429,24 +1468,24 @@ int CheckThread::Work() {
    status_ = result;
    logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n",
              thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
  }
  
  
  // Memory copy work loop. Execute until marked done.
-int CopyThread::Work() {
+bool CopyThread::Work() {
    struct page_entry src;
    struct page_entry dst;
-  int result = 1;
+  bool result = true;
    int64 loops = 0;
  
-  logprintf(9, "Log: Starting copy thread %d: cpu %x, mem %x\n",
-            thread_num_, cpu_mask_, tag_);
+  logprintf(9, "Log: Starting copy thread %d: cpu %s, mem %x\n",
+            thread_num_, cpuset_format(&cpu_mask_).c_str(), tag_);
  
    while (IsReadyToRun()) {
      // Pop the needed pages.
-    result &= sat_->GetValid(&src, tag_);
-    result &= sat_->GetEmpty(&dst, tag_);
+    result = result && sat_->GetValid(&src, tag_);
+    result = result && sat_->GetEmpty(&dst, tag_);
      if (!result) {
        logprintf(0, "Process Error: copy_thread failed to pop pages, "
                  "bailing\n");
@@ -1472,8 +1511,8 @@ int CopyThread::Work() {
        dst.pattern = src.pattern;
      }
  
-    result &= sat_->PutValid(&dst);
-    result &= sat_->PutEmpty(&src);
+    result = result && sat_->PutValid(&dst);
+    result = result && sat_->PutEmpty(&src);
  
      // Copy worker-threads yield themselves at the end of each copy loop,
      // to avoid threads from preempting each other in the middle of the inner
@@ -1494,20 +1533,20 @@ int CopyThread::Work() {
    status_ = result;
    logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
              thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
  }
  
  // Memory invert work loop. Execute until marked done.
-int InvertThread::Work() {
+bool InvertThread::Work() {
    struct page_entry src;
-  int result = 1;
+  bool result = true;
    int64 loops = 0;
  
    logprintf(9, "Log: Starting invert thread %d\n", thread_num_);
  
    while (IsReadyToRun()) {
      // Pop the needed pages.
-    result &= sat_->GetValid(&src);
+    result = result && sat_->GetValid(&src);
      if (!result) {
        logprintf(0, "Process Error: invert_thread failed to pop pages, "
                  "bailing\n");
@@ -1533,7 +1572,7 @@ int InvertThread::Work() {
      if (sat_->strict())
        CrcCheckPage(&src);
  
-    result &= sat_->PutValid(&src);
+    result = result && sat_->PutValid(&src);
      if (!result) {
        logprintf(0, "Process Error: invert_thread failed to push pages, "
                  "bailing\n");
@@ -1546,7 +1585,7 @@ int InvertThread::Work() {
    status_ = result;
    logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
              thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
  }
  
  
@@ -1558,24 +1597,26 @@ void FileThread::SetFile(const char *filename_init) {
  
  // Open the file for access.
  bool FileThread::OpenFile(int *pfile) {
-  int fd = open(filename_.c_str(),
-                O_RDWR | O_CREAT | O_SYNC | O_DIRECT,
-                0644);
+  int flags = O_RDWR | O_CREAT | O_SYNC;
+  int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
+  if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
+    fd = open(filename_.c_str(), flags, 0644);  // Try without O_DIRECT
+    os_->ActivateFlushPageCache();  // Not using O_DIRECT fixed EINVAL
+  }
    if (fd < 0) {
      logprintf(0, "Process Error: Failed to create file %s!!\n",
                filename_.c_str());
      pages_copied_ = 0;
-    status_ = 0;
-    return 0;
+    return false;
    }
    *pfile = fd;
-  return 1;
+  return true;
  }
  
  // Close the file.
  bool FileThread::CloseFile(int fd) {
    close(fd);
-  return 1;
+  return true;
  }
  
  // Check sector tagging.
@@ -1615,7 +1656,7 @@ bool FileThread::WritePages(int fd) {
    int strict = sat_->strict();
  
    // Start fresh at beginning of file for each batch of pages.
-  lseek(fd, 0, SEEK_SET);
+  lseek64(fd, 0, SEEK_SET);
    for (int i = 0; i < sat_->disk_pages(); i++) {
      struct page_entry src;
      if (!GetValidPage(&src))
@@ -1638,7 +1679,7 @@ bool FileThread::WritePages(int fd) {
      if (!result)
        return false;
    }
-  return true;
+  return os_->FlushPageCache();  // If O_DIRECT worked, this will be a NOP.
  }
  
  // Copy data from file into memory block.
@@ -1765,12 +1806,17 @@ bool FileThread::PagePrepare() {
  
    // Init a local buffer if we need it.
    if (!page_io_) {
+#ifdef HAVE_POSIX_MEMALIGN
      int result = posix_memalign(&local_page_, 512, sat_->page_length());
+#else
+    local_page_ = memalign(512, sat_->page_length());
+    int result = (local_page_ == 0);
+#endif
      if (result) {
        logprintf(0, "Process Error: disk thread posix_memalign "
                     "returned %d (fail)\n",
                  result);
-      status_ += 1;
+      status_ = false;
        return false;
      }
    }
@@ -1839,17 +1885,14 @@ bool FileThread::PutValidPage(struct page_entry *src) {
    return true;
  }
  
-
-
  // Copy data from file into memory blocks.
  bool FileThread::ReadPages(int fd) {
    int page_length = sat_->page_length();
    int strict = sat_->strict();
-  int result = 1;
-
+  bool result = true;
  
    // Read our data back out of the file, into it's new location.
-  lseek(fd, 0, SEEK_SET);
+  lseek64(fd, 0, SEEK_SET);
    for (int i = 0; i < sat_->disk_pages(); i++) {
      struct page_entry dst;
      if (!GetEmptyPage(&dst))
@@ -1888,11 +1931,9 @@ bool FileThread::ReadPages(int fd) {
    return result;
  }
  
-
  // File IO work loop. Execute until marked done.
-int FileThread::Work() {
-  int result = 1;
-  int fileresult = 1;
+bool FileThread::Work() {
+  bool result = true;
    int64 loops = 0;
  
    logprintf(9, "Log: Starting file thread %d, file %s, device %s\n",
@@ -1900,30 +1941,34 @@ int FileThread::Work() {
              filename_.c_str(),
              devicename_.c_str());
  
-  if (!PagePrepare())
-    return 0;
+  if (!PagePrepare()) {
+    status_ = false;
+    return false;
+  }
  
    // Open the data IO file.
    int fd = 0;
-  if (!OpenFile(&fd))
-    return 0;
+  if (!OpenFile(&fd)) {
+    status_ = false;
+    return false;
+  }
  
    pass_ = 0;
  
    // Load patterns into page records.
    page_recs_ = new struct PageRec[sat_->disk_pages()];
    for (int i = 0; i < sat_->disk_pages(); i++) {
-    page_recs_[i].pattern = new struct Pattern();
+    page_recs_[i].pattern = new class Pattern();
    }
  
    // Loop until done.
    while (IsReadyToRun()) {
      // Do the file write.
-    if (!(fileresult &= WritePages(fd)))
+    if (!(result = result && WritePages(fd)))
        break;
  
      // Do the file read.
-    if (!(fileresult &= ReadPages(fd)))
+    if (!(result = result && ReadPages(fd)))
        break;
  
      loops++;
@@ -1931,7 +1976,6 @@ int FileThread::Work() {
    }
  
    pages_copied_ = loops * sat_->disk_pages();
-  status_ = result;
  
    // Clean up.
    CloseFile(fd);
@@ -1939,7 +1983,10 @@ int FileThread::Work() {
  
    logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
              thread_num_, status_, pages_copied_);
-  return 1;
+  // Failure to read from device indicates hardware,
+  // rather than procedural SW error.
+  status_ = true;
+  return true;
  }
  
  bool NetworkThread::IsNetworkStopSet() {
@@ -1965,7 +2012,7 @@ bool NetworkThread::CreateSocket(int *psocket) {
    if (sock == -1) {
      logprintf(0, "Process Error: Cannot open socket\n");
      pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
      return false;
    }
    *psocket = sock;
@@ -1989,7 +2036,7 @@ bool NetworkThread::Connect(int sock) {
    if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) {
      logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_);
      pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
      return false;
    }
  
@@ -1997,7 +2044,7 @@ bool NetworkThread::Connect(int sock) {
                      sizeof(struct sockaddr))) {
      logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_);
      pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
      return false;
    }
    return true;
@@ -2013,12 +2060,12 @@ bool NetworkListenThread::Listen() {
    sa.sin_addr.s_addr = INADDR_ANY;
    sa.sin_port = htons(kNetworkPort);
  
-  if (-1 == bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
+  if (-1 == ::bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
      char buf[256];
      sat_strerror(errno, buf, sizeof(buf));
      logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
      pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
      return false;
    }
    listen(sock_, 3);
@@ -2052,13 +2099,14 @@ bool NetworkListenThread::GetConnection(int *pnewsock) {
    if (newsock < 0)  {
      logprintf(0, "Process Error: Did not receive connection\n");
      pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
      return false;
    }
    *pnewsock = newsock;
    return true;
  }
  
+// Send a page, return false if a page was not sent.
  bool NetworkThread::SendPage(int sock, struct page_entry *src) {
    int page_length = sat_->page_length();
    char *address = static_cast<char*>(src->addr);
@@ -2074,6 +2122,7 @@ bool NetworkThread::SendPage(int sock, struct page_entry *src) {
          logprintf(0, "Process Error: Thread %d, "
                       "Network write failed, bailing. (%s)\n",
                    thread_num_, buf);
+        status_ = false;
        }
        return false;
      }
@@ -2082,7 +2131,7 @@ bool NetworkThread::SendPage(int sock, struct page_entry *src) {
    return true;
  }
  
-
+// Receive a page. Return false if a page was not received.
  bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
    int page_length = sat_->page_length();
    char *address = static_cast<char*>(dst->addr);
@@ -2099,7 +2148,7 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
          if (transferred == 0 && err == 0) {
            // Two system setups will not sync exactly,
            // allow early exit, but log it.
-          logprintf(0, "Log: Net thread did not recieve any data, exitting.\n");
+          logprintf(0, "Log: Net thread did not receive any data, exiting.\n");
          } else {
            char buf[256] = "";
            sat_strerror(err, buf, sizeof(buf));
@@ -2107,6 +2156,7 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
            logprintf(0, "Process Error: Thread %d, "
                         "Network read failed, bailing (%s).\n",
                      thread_num_, buf);
+          status_ = false;
            // Print arguments and results.
            logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n",
                      sock, address + (page_length - size),
@@ -2129,9 +2179,9 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
    return true;
  }
  
-
  // Network IO work loop. Execute until marked done.
-int NetworkThread::Work() {
+// Return true if the thread ran as expected.
+bool NetworkThread::Work() {
    logprintf(9, "Log: Starting network thread %d, ip %s\n",
              thread_num_,
              ipaddr_);
@@ -2139,7 +2189,7 @@ int NetworkThread::Work() {
    // Make a socket.
    int sock = 0;
    if (!CreateSocket(&sock))
-    return 0;
+    return false;
  
    // Network IO loop requires network slave thread to have already initialized.
    // We will sleep here for awhile to ensure that the slave thread will be
@@ -2153,17 +2203,17 @@ int NetworkThread::Work() {
  
    // Connect to a slave thread.
    if (!Connect(sock))
-    return 0;
+    return false;
  
    // Loop until done.
-  int result = 1;
+  bool result = true;
    int strict = sat_->strict();
    int64 loops = 0;
    while (IsReadyToRun()) {
      struct page_entry src;
      struct page_entry dst;
-    result &= sat_->GetValid(&src);
-    result &= sat_->GetEmpty(&dst);
+    result = result && sat_->GetValid(&src);
+    result = result && sat_->GetEmpty(&dst);
      if (!result) {
        logprintf(0, "Process Error: net_thread failed to pop pages, "
                  "bailing\n");
@@ -2175,14 +2225,14 @@ int NetworkThread::Work() {
        CrcCheckPage(&src);
  
      // Do the network write.
-    if (!(result &= SendPage(sock, &src)))
+    if (!(result = result && SendPage(sock, &src)))
        break;
  
      // Update pattern reference to reflect new contents.
      dst.pattern = src.pattern;
  
      // Do the network read.
-    if (!(result &= ReceivePage(sock, &dst)))
+    if (!(result = result && ReceivePage(sock, &dst)))
        break;
  
      // Ensure that the transfer ended up with correct data.
@@ -2190,8 +2240,8 @@ int NetworkThread::Work() {
        CrcCheckPage(&dst);
  
      // Return all of our pages to the queue.
-    result &= sat_->PutValid(&dst);
-    result &= sat_->PutEmpty(&src);
+    result = result && sat_->PutValid(&dst);
+    result = result && sat_->PutEmpty(&src);
      if (!result) {
        logprintf(0, "Process Error: net_thread failed to push pages, "
                  "bailing\n");
@@ -2209,7 +2259,7 @@ int NetworkThread::Work() {
    logprintf(9, "Log: Completed %d: network thread status %d, "
                 "%d pages copied\n",
              thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
  }
  
  // Spawn slave threads for incoming connections.
@@ -2234,7 +2284,7 @@ bool NetworkListenThread::ReapSlaves() {
    // Gather status and reap threads.
    logprintf(12, "Log: Joining all outstanding threads\n");
  
-  for (int i = 0; i < child_workers_.size(); i++) {
+  for (size_t i = 0; i < child_workers_.size(); i++) {
      NetworkSlaveThread& child_thread = child_workers_[i]->thread;
      logprintf(12, "Log: Joining slave thread %d\n", i);
      child_thread.JoinThread();
@@ -2253,15 +2303,17 @@ bool NetworkListenThread::ReapSlaves() {
  }
  
  // Network listener IO work loop. Execute until marked done.
-int NetworkListenThread::Work() {
-  int result = 1;
+// Return false on fatal software error.
+bool NetworkListenThread::Work() {
    logprintf(9, "Log: Starting network listen thread %d\n",
              thread_num_);
  
    // Make a socket.
    sock_ = 0;
-  if (!CreateSocket(&sock_))
-    return 0;
+  if (!CreateSocket(&sock_)) {
+    status_ = false;
+    return false;
+  }
    logprintf(9, "Log: Listen thread created sock\n");
  
    // Allows incoming connections to be queued up by socket library.
@@ -2296,12 +2348,12 @@ int NetworkListenThread::Work() {
  
    CloseSocket(sock_);
  
-  status_ = result;
+  status_ = true;
    logprintf(9,
              "Log: Completed %d: network listen thread status %d, "
              "%d pages copied\n",
              thread_num_, status_, pages_copied_);
-  return 1;
+  return true;
  }
  
  // Set network reflector socket struct.
@@ -2310,25 +2362,33 @@ void NetworkSlaveThread::SetSock(int sock) {
  }
  
  // Network reflector IO work loop. Execute until marked done.
-int NetworkSlaveThread::Work() {
+// Return false on fatal software error.
+bool NetworkSlaveThread::Work() {
    logprintf(9, "Log: Starting network slave thread %d\n",
              thread_num_);
  
    // Verify that we have a socket.
    int sock = sock_;
-  if (!sock)
-    return 0;
+  if (!sock) {
+    status_ = false;
+    return false;
+  }
  
    // Loop until done.
    int64 loops = 0;
    // Init a local buffer for storing data.
    void *local_page = NULL;
+#ifdef HAVE_POSIX_MEMALIGN
    int result = posix_memalign(&local_page, 512, sat_->page_length());
+#else
+  local_page = memalign(512, sat_->page_length());
+  int result = (local_page == 0);
+#endif
    if (result) {
      logprintf(0, "Process Error: net slave posix_memalign "
                   "returned %d (fail)\n",
                result);
-    status_ += 1;
+    status_ = false;
      return false;
    }
  
@@ -2351,7 +2411,7 @@ int NetworkSlaveThread::Work() {
  
    pages_copied_ = loops;
    // No results provided from this type of thread.
-  status_ = 1;
+  status_ = true;
  
    // Clean up.
    CloseSocket(sock);
@@ -2360,11 +2420,11 @@ int NetworkSlaveThread::Work() {
              "Log: Completed %d: network slave thread status %d, "
              "%d pages copied\n",
              thread_num_, status_, pages_copied_);
-  return status_;
+  return true;
  }
  
  // Thread work loop. Execute until marked finished.
-int ErrorPollThread::Work() {
+bool ErrorPollThread::Work() {
    logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_);
  
    // This calls a generic error polling function in the Os abstraction layer.
@@ -2375,12 +2435,13 @@ int ErrorPollThread::Work() {
  
    logprintf(9, "Log: Finished system error poll thread %d: %d errors\n",
              thread_num_, errorcount_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
  }
  
  // Worker thread to heat up CPU.
-int CpuStressThread::Work() {
+// This thread does not evaluate pass/fail or software error.
+bool CpuStressThread::Work() {
    logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_);
  
    do {
@@ -2391,28 +2452,50 @@ int CpuStressThread::Work() {
  
    logprintf(9, "Log: Finished CPU stress thread %d:\n",
              thread_num_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
  }
  
  CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
                                                   int cacheline_count,
                                                   int thread_num,
+                                                 int thread_count,
                                                   int inc_count) {
    cc_cacheline_data_ = data;
    cc_cacheline_count_ = cacheline_count;
    cc_thread_num_ = thread_num;
+  cc_thread_count_ = thread_count;
    cc_inc_count_ = inc_count;
  }
  
+// A very simple psuedorandom generator.  Since the random number is based
+// on only a few simple logic operations, it can be done quickly in registers
+// and the compiler can inline it.
+uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) {
+  return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial);
+}
+
  // Worked thread to test the cache coherency of the CPUs
-int CpuCacheCoherencyThread::Work() {
+// Return false on fatal sw error.
+bool CpuCacheCoherencyThread::Work() {
    logprintf(9, "Log: Starting the Cache Coherency thread %d\n",
              cc_thread_num_);
    uint64 time_start, time_end;
    struct timeval tv;
  
+  // Use a slightly more robust random number for the initial
+  // value, so the random sequences from the simple generator will
+  // be more divergent.
+#ifdef HAVE_RAND_R
    unsigned int seed = static_cast<unsigned int>(gettid());
+  uint64 r = static_cast<uint64>(rand_r(&seed));
+  r |= static_cast<uint64>(rand_r(&seed)) << 32;
+#else
+  srand(time(NULL));
+  uint64 r = static_cast<uint64>(rand());  // NOLINT
+  r |= static_cast<uint64>(rand()) << 32;  // NOLINT
+#endif
+
    gettimeofday(&tv, NULL);  // Get the timestamp before increments.
    time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
  
@@ -2422,10 +2505,19 @@ int CpuCacheCoherencyThread::Work() {
        // Choose a datastructure in random and increment the appropriate
        // member in that according to the offset (which is the same as the
        // thread number.
-      int r = rand_r(&seed);
-      r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
+      r = SimpleRandom(r);
+      int cline_num = r % cc_cacheline_count_;
+      int offset;
+      // Reverse the order for odd numbered threads in odd numbered cache
+      // lines.  This is designed for massively multi-core systems where the
+      // number of cores exceeds the bytes in a cache line, so "distant" cores
+      // get a chance to exercize cache coherency between them.
+      if (cline_num & cc_thread_num_ & 1)
+        offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+      else
+        offset = cc_thread_num_;
        // Increment the member of the randomely selected structure.
-      (cc_cacheline_data_[r].num[cc_thread_num_])++;
+      (cc_cacheline_data_[cline_num].num[offset])++;
      }
  
      total_inc += cc_inc_count_;
@@ -2434,14 +2526,26 @@ int CpuCacheCoherencyThread::Work() {
      // in all the cache line structures for this particular thread.
      int cc_global_num = 0;
      for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
-      cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
+      int offset;
+      // Perform the same offset calculation from above.
+      if (cline_num & cc_thread_num_ & 1)
+        offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+      else
+        offset = cc_thread_num_;
+      cc_global_num += cc_cacheline_data_[cline_num].num[offset];
        // Reset the cachline member's value for the next run.
-      cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
+      cc_cacheline_data_[cline_num].num[offset] = 0;
      }
      if (sat_->error_injection())
        cc_global_num = -1;
  
-    if (cc_global_num != cc_inc_count_) {
+    // Since the count is only stored in a byte, to squeeze more into a
+    // single cache line, only compare it as a byte.  In the event that there
+    // is something detected, the chance that it would be missed by a single
+    // thread is 1 in 256.  If it affects all cores, that makes the chance
+    // of it being missed terribly minute.  It seems unlikely any failure
+    // case would be off by more than a small number.
+    if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) {
        errorcount_++;
        logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
                  cc_global_num, cc_inc_count_);
@@ -2459,8 +2563,8 @@ int CpuCacheCoherencyThread::Work() {
              cc_thread_num_, us_elapsed, total_inc, inc_rate);
    logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n",
              cc_thread_num_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
  }
  
  DiskThread::DiskThread(DiskBlockTable *block_table) {
@@ -2484,14 +2588,21 @@ DiskThread::DiskThread(DiskBlockTable *block_table) {
    device_sectors_ = 0;
    non_destructive_ = 0;
  
+#ifdef HAVE_LIBAIO_H
    aio_ctx_ = 0;
+#endif
    block_table_ = block_table;
    update_block_table_ = 1;
  
    block_buffer_ = NULL;
+
+  blocks_written_ = 0;
+  blocks_read_ = 0;
  }
  
  DiskThread::~DiskThread() {
+  if (block_buffer_)
+    free(block_buffer_);
  }
  
  // Set filename for device file (in /dev).
@@ -2616,10 +2727,14 @@ bool DiskThread::SetParameters(int read_block_size,
    return true;
  }
  
+// Open a device, return false on failure.
  bool DiskThread::OpenDevice(int *pfile) {
-  int fd = open(device_name_.c_str(),
-                O_RDWR | O_SYNC | O_DIRECT | O_LARGEFILE,
-                0);
+  int flags = O_RDWR | O_SYNC | O_LARGEFILE;
+  int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
+  if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
+    fd = open(device_name_.c_str(), flags, 0);  // Try without O_DIRECT
+    os_->ActivateFlushPageCache();
+  }
    if (fd < 0) {
      logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
                device_name_.c_str(), thread_num_);
@@ -2631,6 +2746,7 @@ bool DiskThread::OpenDevice(int *pfile) {
  }
  
  // Retrieves the size (in bytes) of the disk/file.
+// Return false on failure.
  bool DiskThread::GetDiskSize(int fd) {
    struct stat device_stat;
    if (fstat(fd, &device_stat) == -1) {
@@ -2650,11 +2766,11 @@ bool DiskThread::GetDiskSize(int fd) {
        return false;
      }
  
-    // If an Elephant is initialized with status DEAD its size will be zero.
+    // Zero size indicates nonworking device..
      if (block_size == 0) {
        os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
        ++errorcount_;
-      status_ = 1;  // Avoid a procedural error.
+      status_ = true;  // Avoid a procedural error.
        return false;
      }
  
@@ -2694,10 +2810,11 @@ int64 DiskThread::GetTime() {
    return tv.tv_sec * 1000000 + tv.tv_usec;
  }
  
+// Do randomized reads and (possibly) writes on a device.
+// Return false on fatal SW error, true on SW success,
+// regardless of whether HW failed.
  bool DiskThread::DoWork(int fd) {
    int64 block_num = 0;
-  blocks_written_ = 0;
-  blocks_read_ = 0;
    int64 num_segments;
  
    if (segment_size_ == -1) {
@@ -2731,13 +2848,16 @@ bool DiskThread::DoWork(int fd) {
  
    while (IsReadyToRun()) {
      // Write blocks to disk.
-    logprintf(16, "Write phase for disk %s (thread %d).\n",
+    logprintf(16, "Log: Write phase %sfor disk %s (thread %d).\n",
+              non_destructive_ ? "(disabled) " : "",
                device_name_.c_str(), thread_num_);
      while (IsReadyToRunNoPause() &&
-           in_flight_sectors_.size() < queue_size_ + 1) {
+           in_flight_sectors_.size() <
+               static_cast<size_t>(queue_size_ + 1)) {
        // Confine testing to a particular segment of the disk.
        int64 segment = (block_num / blocks_per_segment_) % num_segments;
-      if (block_num % blocks_per_segment_ == 0) {
+      if (!non_destructive_ &&
+          (block_num % blocks_per_segment_ == 0)) {
          logprintf(20, "Log: Starting to write segment %lld out of "
                    "%lld on disk %s (thread %d).\n",
                    segment, num_segments, device_name_.c_str(),
@@ -2768,23 +2888,28 @@ bool DiskThread::DoWork(int fd) {
        if (!non_destructive_) {
          if (!WriteBlockToDisk(fd, block)) {
            block_table_->RemoveBlock(block);
-          continue;
+          return true;
          }
+        blocks_written_++;
        }
  
-      block->SetBlockAsInitialized();
+      // Block is either initialized by writing, or in nondestructive case,
+      // initialized by being added into the datastructure for later reading.
+      block->initialized();
  
-      blocks_written_++;
        in_flight_sectors_.push(block);
      }
+    if (!os_->FlushPageCache())  // If O_DIRECT worked, this will be a NOP.
+      return false;
  
      // Verify blocks on disk.
-    logprintf(20, "Read phase for disk %s (thread %d).\n",
+    logprintf(20, "Log: Read phase for disk %s (thread %d).\n",
                device_name_.c_str(), thread_num_);
      while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) {
        BlockData *block = in_flight_sectors_.front();
        in_flight_sectors_.pop();
-      ValidateBlockOnDisk(fd, block);
+      if (!ValidateBlockOnDisk(fd, block))
+        return true;
        block_table_->RemoveBlock(block);
        blocks_read_++;
      }
@@ -2795,8 +2920,10 @@ bool DiskThread::DoWork(int fd) {
  }
  
  // Do an asynchronous disk I/O operation.
+// Return false if the IO is not set up.
  bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
                              int64 offset, int64 timeout) {
+#ifdef HAVE_LIBAIO_H
    // Use the Linux native asynchronous I/O interface for reading/writing.
    // A read/write consists of three basic steps:
    //    1. create an io context.
@@ -2808,8 +2935,8 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
      const char *op_str;
      const char *error_str;
    } operations[2] = {
-    { IOCB_CMD_PREAD, "read", "disk-read-error" },
-    { IOCB_CMD_PWRITE, "write", "disk-write-error" }
+    { IO_CMD_PREAD, "read", "disk-read-error" },
+    { IO_CMD_PWRITE, "write", "disk-write-error" }
    };
  
    struct iocb cb;
@@ -2817,16 +2944,19 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
  
    cb.aio_fildes = fd;
    cb.aio_lio_opcode = operations[op].opcode;
-  cb.aio_buf = (__u64)buf;
-  cb.aio_nbytes = size;
-  cb.aio_offset = offset;
+  cb.u.c.buf = buf;
+  cb.u.c.nbytes = size;
+  cb.u.c.offset = offset;
  
    struct iocb *cbs[] = { &cb };
    if (io_submit(aio_ctx_, 1, cbs) != 1) {
+    int error = errno;
+    char buf[256];
+    sat_strerror(error, buf, sizeof(buf));
      logprintf(0, "Process Error: Unable to submit async %s "
-                 "on disk %s (thread %d).\n",
+                 "on disk %s (thread %d). Error %d, %s\n",
                operations[op].op_str, device_name_.c_str(),
-              thread_num_);
+              thread_num_, error, buf);
      return false;
    }
  
@@ -2839,7 +2969,8 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
      // A ctrl-c from the keyboard will cause io_getevents to fail with an
      // EINTR error code.  This is not an error and so don't treat it as such,
      // but still log it.
-    if (errno == EINTR) {
+    int error = errno;
+    if (error == EINTR) {
        logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n",
                  operations[op].op_str, device_name_.c_str(),
                  thread_num_);
@@ -2860,9 +2991,12 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
      io_destroy(aio_ctx_);
      aio_ctx_ = 0;
      if (io_setup(5, &aio_ctx_)) {
+      int error = errno;
+      char buf[256];
+      sat_strerror(error, buf, sizeof(buf));
        logprintf(0, "Process Error: Unable to create aio context on disk %s"
-                " (thread %d).\n",
-                device_name_.c_str(), thread_num_);
+                " (thread %d) Error %d, %s\n",
+                device_name_.c_str(), thread_num_, error, buf);
      }
  
      return false;
@@ -2870,12 +3004,13 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
  
    // event.res contains the number of bytes written/read or
    // error if < 0, I think.
-  if (event.res != size) {
+  if (event.res != static_cast<uint64>(size)) {
      errorcount_++;
      os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
  
-    if (event.res < 0) {
-      switch (event.res) {
+    int64 result = static_cast<int64>(event.res);
+    if (result < 0) {
+      switch (result) {
          case -EIO:
            logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
                         "sectors starting at %lld on disk %s (thread %d).\n",
@@ -2898,11 +3033,15 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
    }
  
    return true;
+#else  // !HAVE_LIBAIO_H
+  return false;
+#endif
  }
  
  // Write a block to disk.
+// Return false if the block is not written.
  bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
-  memset(block_buffer_, 0, block->GetSize());
+  memset(block_buffer_, 0, block->size());
  
    // Fill block buffer with a pattern
    struct page_entry pe;
@@ -2910,30 +3049,30 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
      // Even though a valid page could not be obatined, it is not an error
      // since we can always fill in a pattern directly, albeit slower.
      unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
-    block->SetPattern(patternlist_->GetRandomPattern());
+    block->set_pattern(patternlist_->GetRandomPattern());
  
      logprintf(11, "Log: Warning, using pattern fill fallback in "
                    "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
                device_name_.c_str(), thread_num_);
  
-    for (int i = 0; i < block->GetSize()/wordsize_; i++) {
-      memblock[i] = block->GetPattern()->pattern(i);
+    for (unsigned int i = 0; i < block->size()/wordsize_; i++) {
+      memblock[i] = block->pattern()->pattern(i);
      }
    } else {
-    memcpy(block_buffer_, pe.addr, block->GetSize());
-    block->SetPattern(pe.pattern);
+    memcpy(block_buffer_, pe.addr, block->size());
+    block->set_pattern(pe.pattern);
      sat_->PutValid(&pe);
    }
  
    logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
              " (thread %d).\n",
-            block->GetSize()/kSectorSize, block->GetAddress(),
+            block->size()/kSectorSize, block->address(),
              device_name_.c_str(), thread_num_);
  
    int64 start_time = GetTime();
  
-  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
-                   block->GetAddress() * kSectorSize, write_timeout_)) {
+  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(),
+                   block->address() * kSectorSize, write_timeout_)) {
      return false;
    }
  
@@ -2951,12 +3090,14 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
  }
  
  // Verify a block on disk.
+// Return true if the block was read, also increment errorcount
+// if the block had data errors or performance problems.
  bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
-  int64 blocks = block->GetSize() / read_block_size_;
+  int64 blocks = block->size() / read_block_size_;
    int64 bytes_read = 0;
    int64 current_blocks;
    int64 current_bytes;
-  uint64 address = block->GetAddress();
+  uint64 address = block->address();
  
    logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
              "(thread %d).\n",
@@ -2964,7 +3105,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
  
    // Read block from disk and time the read.  If it takes longer than the
    // threshold, complain.
-  if (lseek(fd, address * kSectorSize, SEEK_SET) == -1) {
+  if (lseek64(fd, address * kSectorSize, SEEK_SET) == -1) {
      logprintf(0, "Process Error: Unable to seek to sector %lld in "
                "DiskThread::ValidateSectorsOnDisk on disk %s "
                "(thread %d).\n", address, device_name_.c_str(), thread_num_);
@@ -2976,7 +3117,6 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
    // read them in groups of randomly-sized multiples of read block size.
    // This assures all data written on disk by this particular block
    // will be tested using a random reading pattern.
-
    while (blocks != 0) {
      // Test all read blocks in a written block.
      current_blocks = (random() % blocks) + 1;
@@ -3009,7 +3149,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
      // In non-destructive mode, don't compare the block to the pattern since
      // the block was never written to disk in the first place.
      if (!non_destructive_) {
-      if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
+      if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
                        0, bytes_read)) {
          os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
          errorcount_ += 1;
@@ -3027,7 +3167,9 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
    return true;
  }
  
-int DiskThread::Work() {
+// Direct device access thread.
+// Return false on software error.
+bool DiskThread::Work() {
    int fd;
  
    logprintf(9, "Log: Starting disk thread %d, disk %s\n",
@@ -3036,42 +3178,52 @@ int DiskThread::Work() {
    srandom(time(NULL));
  
    if (!OpenDevice(&fd)) {
-    return 0;
+    status_ = false;
+    return false;
    }
  
    // Allocate a block buffer aligned to 512 bytes since the kernel requires it
-  // when using direst IO.
-
-  int result = posix_memalign(&block_buffer_, kBufferAlignment,
-                              sat_->page_length());
-  if (result) {
+  // when using direct IO.
+#ifdef HAVE_POSIX_MEMALIGN
+  int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
+                                       sat_->page_length());
+#else
+  block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
+  int memalign_result = (block_buffer_ == 0);
+#endif
+  if (memalign_result) {
      CloseDevice(fd);
      logprintf(0, "Process Error: Unable to allocate memory for buffers "
                   "for disk %s (thread %d) posix memalign returned %d.\n",
-              device_name_.c_str(), thread_num_, result);
-    status_ += 1;
+              device_name_.c_str(), thread_num_, memalign_result);
+    status_ = false;
      return false;
    }
  
+#ifdef HAVE_LIBAIO_H
    if (io_setup(5, &aio_ctx_)) {
+    CloseDevice(fd);
      logprintf(0, "Process Error: Unable to create aio context for disk %s"
                " (thread %d).\n",
                device_name_.c_str(), thread_num_);
-    return 0;
+    status_ = false;
+    return false;
    }
+#endif
  
-  DoWork(fd);
+  bool result = DoWork(fd);
  
-  status_ = 1;
+  status_ = result;
  
+#ifdef HAVE_LIBAIO_H
    io_destroy(aio_ctx_);
+#endif
    CloseDevice(fd);
-  free(block_buffer_);
  
    logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
                 "%d pages copied\n",
              thread_num_, device_name_.c_str(), status_, pages_copied_);
-  return 1;
+  return result;
  }
  
  RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
@@ -3082,15 +3234,14 @@ RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
  RandomDiskThread::~RandomDiskThread() {
  }
  
+// Workload for random disk thread.
  bool RandomDiskThread::DoWork(int fd) {
-  blocks_read_ = 0;
-  blocks_written_ = 0;
-  logprintf(11, "Random phase for disk %s (thread %d).\n",
+  logprintf(11, "Log: Random phase for disk %s (thread %d).\n",
              device_name_.c_str(), thread_num_);
    while (IsReadyToRun()) {
      BlockData *block = block_table_->GetRandomBlock();
      if (block == NULL) {
-      logprintf(12, "No block available for device %s (thread %d).\n",
+      logprintf(12, "Log: No block available for device %s (thread %d).\n",
                  device_name_.c_str(), thread_num_);
      } else {
        ValidateBlockOnDisk(fd, block);
@@ -3112,6 +3263,8 @@ MemoryRegionThread::~MemoryRegionThread() {
      delete pages_;
  }
  
+// Set a region of memory or MMIO to be tested.
+// Return false if region could not be mapped.
  bool MemoryRegionThread::SetRegion(void *region, int64 size) {
    int plength = sat_->page_length();
    int npages = size / plength;
@@ -3137,6 +3290,8 @@ bool MemoryRegionThread::SetRegion(void *region, int64 size) {
    }
  }
  
+// More detailed error printout for hardware errors in memory or MMIO
+// regions.
  void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
                                        int priority,
                                        const char *message) {
@@ -3187,10 +3342,12 @@ void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
    }
  }
  
-int MemoryRegionThread::Work() {
+// Workload for testion memory or MMIO regions.
+// Return false on software error.
+bool MemoryRegionThread::Work() {
    struct page_entry source_pe;
    struct page_entry memregion_pe;
-  int result = 1;
+  bool result = true;
    int64 loops = 0;
    const uint64 error_constant = 0x00ba00000000ba00LL;
  
@@ -3204,14 +3361,14 @@ int MemoryRegionThread::Work() {
    while (IsReadyToRun()) {
      // Getting pages from SAT and queue.
      phase_ = kPhaseNoPhase;
-    result &= sat_->GetValid(&source_pe);
+    result = result && sat_->GetValid(&source_pe);
      if (!result) {
        logprintf(0, "Process Error: memory region thread failed to pop "
                  "pages from SAT, bailing\n");
        break;
      }
  
-    result &= pages_->PopRandom(&memregion_pe);
+    result = result && pages_->PopRandom(&memregion_pe);
      if (!result) {
        logprintf(0, "Process Error: memory region thread failed to pop "
                  "pages from queue, bailing\n");
@@ -3245,13 +3402,13 @@ int MemoryRegionThread::Work() {
  
      phase_ = kPhaseNoPhase;
      // Storing pages on their proper queues.
-    result &= sat_->PutValid(&source_pe);
+    result = result && sat_->PutValid(&source_pe);
      if (!result) {
        logprintf(0, "Process Error: memory region thread failed to push "
                  "pages into SAT, bailing\n");
        break;
      }
-    result &= pages_->Push(&memregion_pe);
+    result = result && pages_->Push(&memregion_pe);
      if (!result) {
        logprintf(0, "Process Error: memory region thread failed to push "
                  "pages into queue, bailing\n");
@@ -3271,5 +3428,226 @@ int MemoryRegionThread::Work() {
    status_ = result;
    logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d "
              "pages checked\n", thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
+}
+
+// The list of MSRs to read from each cpu.
+const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = {
+  { kMsrTscAddr, "TSC" },
+  { kMsrAperfAddr, "APERF" },
+  { kMsrMperfAddr, "MPERF" },
+};
+
+CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round)
+  : num_cpus_(num_cpus),
+    freq_threshold_(freq_threshold),
+    round_(round) {
+  sat_assert(round >= 0);
+  if (round == 0) {
+    // If rounding is off, force rounding to the nearest MHz.
+    round_ = 1;
+    round_value_ = 0.5;
+  } else {
+    round_value_ = round/2.0;
+  }
+}
+
+CpuFreqThread::~CpuFreqThread() {
+}
+
+// Compute the difference between the currently read MSR values and the
+// previously read values and store the results in delta. If any of the
+// values did not increase, or the TSC value is too small, returns false.
+// Otherwise, returns true.
+bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous,
+                                 CpuDataType *delta) {
+  // Loop through the msrs.
+  for (int msr = 0; msr < kMsrLast; msr++) {
+    if (previous->msrs[msr] > current->msrs[msr]) {
+      logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx "
+                "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr],
+                current->msrs[msr]);
+      return false;
+    } else {
+      delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr];
+    }
+  }
+
+  // Check for TSC < 1 Mcycles over interval.
+  if (delta->msrs[kMsrTsc] < (1000 * 1000)) {
+    logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n");
+    return false;
+  }
+  timersub(&current->tv, &previous->tv, &delta->tv);
+
+  return true;
+}
+
+// Compute the change in values of the MSRs between current and previous,
+// set the frequency in MHz of the cpu. If there is an error computing
+// the delta, return false. Othewise, return true.
+bool CpuFreqThread::ComputeFrequency(CpuDataType *current,
+                                     CpuDataType *previous, int *freq) {
+  CpuDataType delta;
+  if (!ComputeDelta(current, previous, &delta)) {
+    return false;
+  }
+
+  double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0;
+  double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000
+                     * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval;
+
+  // Use the rounding value to round up properly.
+  int computed = static_cast<int>(frequency + round_value_);
+  *freq = computed - (computed % round_);
+  return true;
+}
+
+// This is the task function that the thread executes.
+bool CpuFreqThread::Work() {
+  cpu_set_t cpuset;
+  if (!AvailableCpus(&cpuset)) {
+    logprintf(0, "Process Error: Cannot get information about the cpus.\n");
+    return false;
+  }
+
+  // Start off indicating the test is passing.
+  status_ = true;
+
+  int curr = 0;
+  int prev = 1;
+  uint32 num_intervals = 0;
+  bool paused = false;
+  bool valid;
+  bool pass = true;
+
+  vector<CpuDataType> data[2];
+  data[0].resize(num_cpus_);
+  data[1].resize(num_cpus_);
+  while (IsReadyToRun(&paused)) {
+    if (paused) {
+      // Reset the intervals and restart logic after the pause.
+      num_intervals = 0;
+    }
+    if (num_intervals == 0) {
+      // If this is the first interval, then always wait a bit before
+      // starting to collect data.
+      sat_sleep(kStartupDelay);
+    }
+
+    // Get the per cpu counters.
+    valid = true;
+    for (int cpu = 0; cpu < num_cpus_; cpu++) {
+      if (CPU_ISSET(cpu, &cpuset)) {
+        if (!GetMsrs(cpu, &data[curr][cpu])) {
+          logprintf(0, "Failed to get msrs on cpu %d.\n", cpu);
+          valid = false;
+          break;
+        }
+      }
+    }
+    if (!valid) {
+      // Reset the number of collected intervals since something bad happened.
+      num_intervals = 0;
+      continue;
+    }
+
+    num_intervals++;
+
+    // Only compute a delta when we have at least two intervals worth of data.
+    if (num_intervals > 2) {
+      for (int cpu = 0; cpu < num_cpus_; cpu++) {
+        if (CPU_ISSET(cpu, &cpuset)) {
+          int freq;
+          if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu],
+                                &freq)) {
+            // Reset the number of collected intervals since an unknown
+            // error occurred.
+            logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu);
+            num_intervals = 0;
+            break;
+          }
+          logprintf(15, "Cpu %d Freq %d\n", cpu, freq);
+          if (freq < freq_threshold_) {
+            errorcount_++;
+            pass = false;
+            logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz "
+                      "threshold %d MHz.\n", cpu, freq, freq_threshold_);
+          }
+        }
+      }
+    }
+
+    sat_sleep(kIntervalPause);
+
+    // Swap the values in curr and prev (these values flip between 0 and 1).
+    curr ^= 1;
+    prev ^= 1;
+  }
+
+  return pass;
+}
+
+
+// Get the MSR values for this particular cpu and save them in data. If
+// any error is encountered, returns false. Otherwise, returns true.
+bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) {
+  for (int msr = 0; msr < kMsrLast; msr++) {
+    if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) {
+      return false;
+    }
+  }
+  // Save the time at which we acquired these values.
+  gettimeofday(&data->tv, NULL);
+
+  return true;
+}
+
+// Returns true if this test can run on the current machine. Otherwise,
+// returns false.
+bool CpuFreqThread::CanRun() {
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+  unsigned int eax, ebx, ecx, edx;
+
+  // Check that the TSC feature is supported.
+  // This check is valid for both Intel and AMD.
+  eax = 1;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (!(edx & (1 << 5))) {
+    logprintf(0, "Process Error: No TSC support.\n");
+    return false;
+  }
+
+  // Check the highest extended function level supported.
+  // This check is valid for both Intel and AMD.
+  eax = 0x80000000;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (eax < 0x80000007) {
+    logprintf(0, "Process Error: No invariant TSC support.\n");
+    return false;
+  }
+
+  // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+  // This check is valid for both Intel and AMD.
+  eax = 0x80000007;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if ((edx & (1 << 8)) == 0) {
+    logprintf(0, "Process Error: No non-stop TSC support.\n");
+    return false;
+  }
+
+  // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+  // This check is valid for both Intel and AMD.
+  eax = 0x6;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if ((ecx & 1) == 0) {
+    logprintf(0, "Process Error: No APERF MSR support.\n");
+    return false;
+  }
+  return true;
+#else
+  logprintf(0, "Process Error: "
+               "cpu_freq_test is only supported on X86 processors.\n");
+  return false;
+#endif
  }