From eea0aac32a3f522ea51d389f44dcd8abcfc5a6e0 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Fri, 12 Mar 2010 03:35:04 +0000
Subject: [PATCH] Update stressapptest to 1.0.2 * Fix -d option on 32 bit
 build. * Fix -d bandwidth calculation * DiskThread general cleanup * Add
 libaio dependency for cross platform builds * Allow > 32 cores * Add support
 for --tag_mode and -W on the same run * General error handling cleanup *
 Improve checksum generation failure handling - printout core, node, dimm. *
 -H option specifies minimum required hugepage allocation

---
 configure                     | 286 ++++++++++++++++++++-
 configure.ac                  |  34 ++-
 src/os.cc                     |  32 ++-
 src/os.h                      |  42 +--
 src/os_factory.cc             |   1 -
 src/sat.cc                    |  70 ++---
 src/sat.h                     |   1 +
 src/sattypes.h                |  75 ++++--
 src/stressapptest_config.h.in |   6 +
 src/worker.cc                 | 469 +++++++++++++++++++---------------
 src/worker.h                  |  88 ++++---
 11 files changed, 782 insertions(+), 322 deletions(-)

diff --git a/configure b/configure
index 1619c7c..eda792f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for stressapptest 1.0.1_autoconf.
+# Generated by GNU Autoconf 2.61 for stressapptest 1.0.2_autoconf.
 #
 # Report bugs to <opensource@google.com>.
 #
@@ -574,8 +574,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='stressapptest'
 PACKAGE_TARNAME='stressapptest'
-PACKAGE_VERSION='1.0.1_autoconf'
-PACKAGE_STRING='stressapptest 1.0.1_autoconf'
+PACKAGE_VERSION='1.0.2_autoconf'
+PACKAGE_STRING='stressapptest 1.0.2_autoconf'
 PACKAGE_BUGREPORT='opensource@google.com'
 
 ac_unique_file="src/"
@@ -1228,7 +1228,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures stressapptest 1.0.1_autoconf to adapt to many kinds of systems.
+\`configure' configures stressapptest 1.0.2_autoconf to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1299,7 +1299,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of stressapptest 1.0.1_autoconf:";;
+     short | recursive ) echo "Configuration of stressapptest 1.0.2_autoconf:";;
    esac
   cat <<\_ACEOF
 
@@ -1385,7 +1385,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-stressapptest configure 1.0.1_autoconf
+stressapptest configure 1.0.2_autoconf
 generated by GNU Autoconf 2.61
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1399,7 +1399,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by stressapptest $as_me 1.0.1_autoconf, which was
+It was created by stressapptest $as_me 1.0.2_autoconf, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   $ $0 $@
@@ -2331,7 +2331,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='stressapptest'
- VERSION='1.0.1_autoconf'
+ VERSION='1.0.2_autoconf'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -5112,6 +5112,152 @@ fi
 done
 
 
+
+for ac_header in pthread.h libaio.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to opensource@google.com ##
+## ------------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
 # Checks for typedefs, structures, and compiler characteristics.
 { echo "$as_me:$LINENO: checking for stdbool.h that conforms to C99" >&5
 echo $ECHO_N "checking for stdbool.h that conforms to C99... $ECHO_C" >&6; }
@@ -5991,6 +6137,126 @@ echo "${ECHO_T}$pthread_arg" >&6; }
    LDFLAGS="$LDFLAGS $pthread_arg"
 fi
 
+# Checking for libaio
+libaio_arg="not_available"
+{ echo "$as_me:$LINENO: checking which argument is required to compile libaio" >&5
+echo $ECHO_N "checking which argument is required to compile libaio... $ECHO_C" >&6; }
+
+libaio_header="#include<libaio.h>"
+libaio_body="io_submit(0,0,0)"
+# Check if compile with no extra argument
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$libaio_header
+int
+main ()
+{
+$libaio_body
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext &&
+       $as_test_x conftest$ac_exeext; then
+  libaio_arg=""
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+
+if test x"$libaio_arg" = x"not_available"; then
+  bkp_LDFLAGS="$LDFLAGS"
+  for altheader in -laio; do
+    LDFLAGS="$bkp_LDFLAGS $altheader"
+    cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$libaio_header
+int
+main ()
+{
+$libaio_body
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext &&
+       $as_test_x conftest$ac_exeext; then
+  libaio_arg="$altheader"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS="$bkp_LDFLAGS"
+  done
+fi
+
+if test x"$libaio_arg" = x"not_available"; then
+   { { echo "$as_me:$LINENO: error: Cannot find libaio library, please install libaio-dev
+See \`config.log' for more details." >&5
+echo "$as_me: error: Cannot find libaio library, please install libaio-dev
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+else
+   if test x"$libaio_arg" = x; then
+     { echo "$as_me:$LINENO: result: none" >&5
+echo "${ECHO_T}none" >&6; }
+   else
+     { echo "$as_me:$LINENO: result: $libaio_arg" >&5
+echo "${ECHO_T}$libaio_arg" >&6; }
+   fi
+   LDFLAGS="$LDFLAGS $libaio_arg"
+fi
+
 # Checks for library functions.
 { echo "$as_me:$LINENO: checking whether closedir returns void" >&5
 echo $ECHO_N "checking whether closedir returns void... $ECHO_C" >&6; }
@@ -7396,7 +7662,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by stressapptest $as_me 1.0.1_autoconf, which was
+This file was extended by stressapptest $as_me 1.0.2_autoconf, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -7449,7 +7715,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-stressapptest config.status 1.0.1_autoconf
+stressapptest config.status 1.0.2_autoconf
 configured by $0, generated by GNU Autoconf 2.61,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
diff --git a/configure.ac b/configure.ac
index a7bdb86..5011c89 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
 AC_PREREQ(2.61)
-AC_INIT(stressapptest, 1.0.1_autoconf, opensource@google.com)
+AC_INIT(stressapptest, 1.0.2_autoconf, opensource@google.com)
 
 AC_CANONICAL_HOST
 AC_CANONICAL_BUILD
@@ -87,6 +87,7 @@ CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops  -funroll-loops -DNDEBUG"
 AC_HEADER_DIRENT
 AC_HEADER_STDC
 AC_CHECK_HEADERS([arpa/inet.h fcntl.h malloc.h netdb.h stdint.h stdlib.h string.h sys/ioctl.h sys/socket.h sys/time.h unistd.h])
+AC_CHECK_HEADERS([pthread.h libaio.h])
 
 # Checks for typedefs, structures, and compiler characteristics.
 AC_HEADER_STDBOOL
@@ -132,6 +133,37 @@ else
    LDFLAGS="$LDFLAGS $pthread_arg"
 fi
 
+# Checking for libaio
+libaio_arg="not_available"
+AC_MSG_CHECKING([which argument is required to compile libaio])
+
+libaio_header="#include<libaio.h>"
+libaio_body="io_submit(0,0,0)"
+# Check if compile with no extra argument
+AC_LINK_IFELSE([AC_LANG_PROGRAM($libaio_header, $libaio_body)],
+libaio_arg="")
+
+if test x"$libaio_arg" = x"not_available"; then
+  bkp_LDFLAGS="$LDFLAGS"
+  for altheader in -laio; do
+    LDFLAGS="$bkp_LDFLAGS $altheader"
+    AC_LINK_IFELSE([AC_LANG_PROGRAM($libaio_header, $libaio_body)],
+    libaio_arg="$altheader")
+    LDFLAGS="$bkp_LDFLAGS"
+  done
+fi
+
+if test x"$libaio_arg" = x"not_available"; then
+   AC_MSG_FAILURE([Cannot find libaio library, please install libaio-dev])
+else
+   if test x"$libaio_arg" = x; then
+     AC_MSG_RESULT([none])
+   else
+     AC_MSG_RESULT([$libaio_arg])
+   fi
+   LDFLAGS="$LDFLAGS $libaio_arg"
+fi
+
 # Checks for library functions.
 AC_FUNC_CLOSEDIR_VOID
 AC_PROG_GCC_TRADITIONAL
diff --git a/src/os.cc b/src/os.cc
index 89947b7..4784028 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -52,6 +52,7 @@ OsLayer::OsLayer() {
   testmem_ = 0;
   testmemsize_ = 0;
   totalmemsize_ = 0;
+  min_hugepages_bytes_ = 0;
   error_injection_ = false;
   normal_mem_ = true;
   time_initialized_ = 0;
@@ -178,13 +179,22 @@ cpu_set_t *OsLayer::FindCoreMask(int32 region) {
     for (int i = 0; i < num_cpus_per_node_; ++i) {
       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
     }
-    logprintf(5, "Log: Region %d mask 0x%08X\n",
-                 region, cpuset_to_uint32(&cpu_sets_[region]));
     cpu_sets_valid_[region] = true;
+    logprintf(5, "Log: Region %d mask 0x%s\n",
+                 region, FindCoreMaskFormat(region).c_str());
   }
   return &cpu_sets_[region];
 }
 
+// Return cores associated with a given region in hex string.
+string OsLayer::FindCoreMaskFormat(int32 region) {
+  cpu_set_t* mask = FindCoreMask(region);
+  string format = cpuset_format(mask);
+  if (format.size() < 8)
+    format = string(8 - format.size(), '0') + format;
+  return format;
+}
+
 // Report an error in an easily parseable way.
 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
   time_t now = time(NULL);
@@ -246,16 +256,20 @@ int64 OsLayer::FindFreeMemSize() {
   }
 
   // We want to leave enough stuff for things to run.
-  // If more than 2GB is present, leave 192M + 5% for other stuff.
+  // If the user specified a minimum amount of memory to expect, require that.
+  // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
   // If less than 2GB is present use 85% of what's available.
   // These are fairly arbitrary numbers that seem to work OK.
   //
   // TODO(nsanders): is there a more correct way to determine target
   // memory size?
-  if (physsize < 2048LL * kMegabyte)
+  if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
+    minsize = min_hugepages_bytes_;
+  } else if (physsize < 2048LL * kMegabyte) {
     minsize = ((pages * 85) / 100) * pagesize;
-  else
+  } else {
     minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+  }
 
   // Use hugepage sizing if available.
   if (hugepagesize > 0) {
@@ -325,10 +339,16 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
     if (shmaddr == reinterpret_cast<void*>(-1)) {
       int err = errno;
       char errtxt[256] = "";
-      shmctl(shmid, IPC_RMID, NULL);
       strerror_r(err, errtxt, sizeof(errtxt));
       logprintf(0, "Log: failed to attach shared mem object - err %d (%s).\n",
                 err, errtxt);
+      if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+        int err = errno;
+        char errtxt[256] = "";
+        strerror_r(err, errtxt, sizeof(errtxt));
+        logprintf(0, "Log: failed to remove shared mem object - err %d (%s).\n",
+                  err, errtxt);
+      }
       goto hugepage_failover;
     }
     use_hugepages_ = true;
diff --git a/src/os.h b/src/os.h
index 5faa84d..9ed04d5 100644
--- a/src/os.h
+++ b/src/os.h
@@ -50,6 +50,12 @@ class OsLayer {
   OsLayer();
   virtual ~OsLayer();
 
+  // Set the minimum amount of hugepages that should be available for testing.
+  // Must be set before Initialize().
+  void SetMinimumHugepagesSize(int64 min_bytes) {
+    min_hugepages_bytes_ = min_bytes;
+  }
+
   // Initializes data strctures and open files.
   // Returns false on error.
   virtual bool Initialize();
@@ -75,6 +81,8 @@ class OsLayer {
   virtual int32 FindRegion(uint64 paddr);
   // Find cpu cores associated with a region. Either NUMA or arbitrary.
   virtual cpu_set_t *FindCoreMask(int32 region);
+  // Return cpu cores associated with a region in a hex string.
+  virtual string FindCoreMaskFormat(int32 region);
 
   // Returns the HD device that contains this file.
   virtual string FindFileDevice(string filename);
@@ -228,21 +236,22 @@ class OsLayer {
   ErrCallback get_err_log_callback() { return err_log_callback_; }
 
  protected:
-  void *testmem_;            // Location of test memory.
-  int64 testmemsize_;        // Size of test memory.
-  int64 totalmemsize_;       // Size of available memory.
-  bool  error_injection_;    // Do error injection?
-  bool  normal_mem_;         // Memory DMA capable?
-  bool  use_hugepages_;      // Use hugepage shmem?
-  int   shmid_;              // Handle to shmem
-
-  int64 regionsize_;         // Size of memory "regions"
-  int   regioncount_;        // Number of memory "regions"
-  int   num_cpus_;           // Number of cpus in the system.
-  int   num_nodes_;          // Number of nodes in the system.
-  int   num_cpus_per_node_;  // Number of cpus per node in the system.
-
-  time_t time_initialized_;  // Start time of test.
+  void *testmem_;                // Location of test memory.
+  int64 testmemsize_;            // Size of test memory.
+  int64 totalmemsize_;           // Size of available memory.
+  int64 min_hugepages_bytes_;    // Minimum hugepages size.
+  bool  error_injection_;        // Do error injection?
+  bool  normal_mem_;             // Memory DMA capable?
+  bool  use_hugepages_;          // Use hugepage shmem?
+  int   shmid_;                  // Handle to shmem
+
+  int64 regionsize_;             // Size of memory "regions"
+  int   regioncount_;            // Number of memory "regions"
+  int   num_cpus_;               // Number of cpus in the system.
+  int   num_nodes_;              // Number of nodes in the system.
+  int   num_cpus_per_node_;      // Number of cpus per node in the system.
+
+  time_t time_initialized_;      // Start time of test.
 
   vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
   vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
@@ -263,7 +272,8 @@ class OsLayer {
   DISALLOW_COPY_AND_ASSIGN(OsLayer);
 };
 
-// Selects and returns the proper OS and hardware interface.
+// Selects and returns the proper OS and hardware interface.  Does not call
+// OsLayer::Initialize() on the new object.
 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
 
 #endif  // STRESSAPPTEST_OS_H_ NOLINT
diff --git a/src/os_factory.cc b/src/os_factory.cc
index 8acf573..359f7ee 100644
--- a/src/os_factory.cc
+++ b/src/os_factory.cc
@@ -36,6 +36,5 @@ OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options) {
     logprintf(0, "Process Error: Can't allocate memory\n");
     return 0;
   }
-  os->Initialize();
   return os;
 }
diff --git a/src/sat.cc b/src/sat.cc
index e714ba2..06b4c65 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -582,12 +582,24 @@ bool Sat::Initialize() {
     bad_status();
     return false;
   }
-  if (error_injection_) os_->set_error_injection(true);
+
+  if (min_hugepages_mbytes_ > 0)
+    os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+
+  if (!os_->Initialize()) {
+    logprintf(0, "Process Error: Failed to initialize OS layer\n");
+    bad_status();
+    delete os_;
+    return false;
+  }
 
   // Checks that OS/Build/Platform is supported.
   if (!CheckEnvironment())
     return false;
 
+  if (error_injection_)
+    os_->set_error_injection(true);
+
   // Run SAT in monitor only mode, do not continue to allocate resources.
   if (monitor_mode_) {
     logprintf(5, "Log: Running in monitor-only mode. "
@@ -641,6 +653,7 @@ Sat::Sat() {
   pages_ = 0;
   size_mb_ = 0;
   size_ = size_mb_ * kMegabyte;
+  min_hugepages_mbytes_ = 0;
   freepages_ = 0;
   paddr_base_ = 0;
 
@@ -765,6 +778,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
     // Set number of megabyte to use.
     ARG_IVALUE("-M", size_mb_);
 
+    // Set minimum megabytes of hugepages to require.
+    ARG_IVALUE("-H", min_hugepages_mbytes_);
+
     // Set number of seconds to run.
     ARG_IVALUE("-s", runtime_seconds_);
 
@@ -972,6 +988,7 @@ bool Sat::ParseArgs(int argc, char **argv) {
 void Sat::PrintHelp() {
   printf("Usage: ./sat(32|64) [options]\n"
          " -M mbytes        megabytes of ram to test\n"
+         " -H mbytes        minimum megabytes of hugepages to require\n"
          " -s seconds       number of seconds to run\n"
          " -m threads       number of memory copy threads to run\n"
          " -i threads       number of memory invert threads to run\n"
@@ -1037,20 +1054,6 @@ void Sat::GoogleOsOptions(std::map<std::string, std::string> *options) {
   // Do nothing, no OS-specific argument on public stressapptest
 }
 
-namespace {
-  // This counts the bits set in a bitmask.
-  // This is used to determine number of cores in an available mask.
-  int countbits(uint32 bitfield) {
-    int numbits = 0;
-    for (int i = 0; i < 32; i++) {
-      if (bitfield & (1 << i)) {
-        numbits++;
-      }
-    }
-    return numbits;
-  }
-}
-
 // Launch the SAT task threads. Returns 0 on error.
 void Sat::InitializeThreads() {
   // Memory copy threads.
@@ -1090,18 +1093,19 @@ void Sat::InitializeThreads() {
       int32 region = region_find(i % region_count_);
       cpu_set_t *cpuset = os_->FindCoreMask(region);
       sat_assert(cpuset);
-      int32 cpu_mask = cpuset_to_uint32(cpuset);
       if (region_mode_ == kLocalNuma) {
         // Choose regions associated with this CPU.
-        thread->set_cpu_mask(cpu_mask);
+        thread->set_cpu_mask(cpuset);
         thread->set_tag(1 << region);
       } else if (region_mode_ == kRemoteNuma) {
         // Choose regions not associated with this CPU..
-        thread->set_cpu_mask(cpu_mask);
+        thread->set_cpu_mask(cpuset);
         thread->set_tag(region_mask_ & ~(1 << region));
       }
     } else {
-      int cores = countbits(thread->AvailableCpus());
+      cpu_set_t available_cpus;
+      thread->AvailableCpus(&available_cpus);
+      int cores = cpuset_count(&available_cpus);
       // Don't restrict thread location if we have more than one
       // thread per core. Not so good for performance.
       if (cpu_stress_threads_ + memory_threads_ <= cores) {
@@ -1110,15 +1114,18 @@ void Sat::InitializeThreads() {
         int nthcore = i;
         int nthbit = (((2 * nthcore) % cores) +
                       (((2 * nthcore) / cores) % 2)) % cores;
-        if (thread->AvailableCpus() != ((1 << cores) - 1)) {
+        cpu_set_t all_cores;
+        cpuset_set_ab(&all_cores, 0, cores);
+        if (!cpuset_isequal(&available_cpus, &all_cores)) {
           // We are assuming the bits are contiguous.
           // Complain if this is not so.
-          logprintf(0, "Log: cores = %x, expected %x\n",
-                    thread->AvailableCpus(), ((1 << (cores + 1)) - 1));
+          logprintf(0, "Log: cores = %s, expected %s\n",
+                    cpuset_format(&available_cpus).c_str(),
+                    cpuset_format(&all_cores).c_str());
         }
 
         // Set thread affinity.
-        thread->set_cpu_mask(1 << nthbit);
+        thread->set_cpu_mask_to_cpu(nthbit);
       }
     }
     memory_vector->insert(memory_vector->end(), thread);
@@ -1238,7 +1245,9 @@ void Sat::InitializeThreads() {
 
     // Don't restrict thread location if we have more than one
     // thread per core. Not so good for performance.
-    int cores = countbits(thread->AvailableCpus());
+    cpu_set_t available_cpus;
+    thread->AvailableCpus(&available_cpus);
+    int cores = cpuset_count(&available_cpus);
     if (cpu_stress_threads_ + memory_threads_ <= cores) {
       // Place a thread on alternating cores first.
       // Go in reverse order for CPU stress threads. This assures interleaved
@@ -1246,13 +1255,16 @@ void Sat::InitializeThreads() {
       int nthcore = (cores - 1) - i;
       int nthbit = (((2 * nthcore) % cores) +
                     (((2 * nthcore) / cores) % 2)) % cores;
-      if (thread->AvailableCpus() != ((1 << cores) - 1)) {
-        logprintf(0, "Log: cores = %x, expected %x\n",
-                  thread->AvailableCpus(), ((1 << (cores + 1)) - 1));
+      cpu_set_t all_cores;
+      cpuset_set_ab(&all_cores, 0, cores);
+      if (!cpuset_isequal(&available_cpus, &all_cores)) {
+        logprintf(0, "Log: cores = %s, expected %s\n",
+                  cpuset_format(&available_cpus).c_str(),
+                  cpuset_format(&all_cores).c_str());
       }
 
       // Set thread affinity.
-      thread->set_cpu_mask(1 << nthbit);
+      thread->set_cpu_mask_to_cpu(nthbit);
     }
 
 
@@ -1298,7 +1310,7 @@ void Sat::InitializeThreads() {
       thread->InitThread(total_threads_++, this, os_, patternlist_,
                          &continuous_status_);
       // Pin the thread to a particular core.
-      thread->set_cpu_mask(1 << tnum);
+      thread->set_cpu_mask_to_cpu(tnum);
 
       // Insert the thread into the vector.
       cc_vector->insert(cc_vector->end(), thread);
diff --git a/src/sat.h b/src/sat.h
index b1ad085..950270f 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -147,6 +147,7 @@ class Sat {
   int64 pages_;                       // Number of memory blocks.
   int64 size_;                        // Size of memory tested, in bytes.
   int64 size_mb_;                     // Size of memory tested, in MB.
+  int64 min_hugepages_mbytes_;        // Minimum hugepages size.
   int64 freepages_;                   // How many invalid pages we need.
   int disk_pages_;                    // Number of pages per temp file.
   uint64 paddr_base_;                 // Physical address base.
diff --git a/src/sattypes.h b/src/sattypes.h
index 47fa79f..96bf13b 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -22,6 +22,7 @@
 #include <sys/types.h>
 #include <time.h>
 #include <string.h>
+#include <algorithm>
 #include <string>
 
 #ifdef HAVE_CONFIG_H  // Built using autoconf
@@ -86,36 +87,64 @@ void logprintf(int priority, const char *format, ...);
   // Note: this code is hacked together to deal with difference
   // function signatures across versions of glibc, ie those that take
   // cpu_set_t versus those that take unsigned long.  -johnhuang
-  typedef unsigned long cpu_set_t;
-  #define CPU_SETSIZE                   32
-  #define CPU_ISSET(index, cpu_set_ptr) (*(cpu_set_ptr) & 1 << (index))
-  #define CPU_SET(index, cpu_set_ptr)   (*(cpu_set_ptr) |= 1 << (index))
+  typedef uint64 cpu_set_t;
+  #define CPU_SETSIZE                   (sizeof(cpu_set_t) * 8)
+  #define CPU_ISSET(index, cpu_set_ptr) (*(cpu_set_ptr) & 1ull << (index))
+  #define CPU_SET(index, cpu_set_ptr)   (*(cpu_set_ptr) |= 1ull << (index))
   #define CPU_ZERO(cpu_set_ptr)         (*(cpu_set_ptr) = 0)
-  #define CPU_CLR(index, cpu_set_ptr)   (*(cpu_set_ptr) &= ~(1 << (index)))
+  #define CPU_CLR(index, cpu_set_ptr)   (*(cpu_set_ptr) &= ~(1ull << (index)))
 #endif
 
-// Make using CPUSET non-super-painful.
-static inline uint32 cpuset_to_uint32(cpu_set_t *cpuset) {
-  uint32 value = 0;
-  for (int index = 0; index < CPU_SETSIZE; index++) {
-    if (CPU_ISSET(index, cpuset)) {
-      if (index < 32) {
-          value |= 1 << index;
-      } else {
-        logprintf(0, "Process Error: Cpu index (%d) higher than 32\n", index);
-        sat_assert(0);
-      }
-    }
-  }
-  return value;
+static inline bool cpuset_isequal(const cpu_set_t *c1, const cpu_set_t *c2) {
+  for (int i = 0; i < CPU_SETSIZE; ++i)
+    if ((CPU_ISSET(i, c1) != 0) != (CPU_ISSET(i, c2) != 0))
+      return false;
+  return true;
+}
+
+static inline bool cpuset_issubset(const cpu_set_t *c1, const cpu_set_t *c2) {
+  for (int i = 0; i < CPU_SETSIZE; ++i)
+    if (CPU_ISSET(i, c1) && !CPU_ISSET(i, c2))
+      return false;
+  return true;
+}
+
+static inline int cpuset_count(const cpu_set_t *cpuset) {
+  int count = 0;
+  for (int i = 0; i < CPU_SETSIZE; ++i)
+    if (CPU_ISSET(i, cpuset))
+      ++count;
+  return count;
 }
 
-static inline void cpuset_from_uint32(uint32 mask, cpu_set_t *cpuset) {
+static inline void cpuset_set_ab(cpu_set_t *cpuset, int a, int b) {
   CPU_ZERO(cpuset);
-  for (int index = 0; index < 32; index++) {
-    if (mask & (1 << index))
-      CPU_SET(index, cpuset);
+  for (int i = a; i < b; ++i)
+    CPU_SET(i, cpuset);
+}
+
+static inline string cpuset_format(const cpu_set_t *cpuset) {
+  string format;
+  int digit = 0, last_non_zero_size = 1;
+  for (int i = 0; i < CPU_SETSIZE; ++i) {
+    if (CPU_ISSET(i, cpuset)) {
+      digit |= 1 << (i & 3);
+    }
+    if ((i & 3) == 3) {
+      format += char(digit <= 9 ? '0' + digit: 'A' + digit - 10);
+      if (digit) {
+        last_non_zero_size = format.size();
+        digit = 0;
+      }
+    }
+  }
+  if (digit) {
+    format += char(digit <= 9 ? '0' + digit: 'A' + digit - 10);
+    last_non_zero_size = format.size();
   }
+  format.erase(last_non_zero_size);
+  reverse(format.begin(), format.end());
+  return format;
 }
 
 static const int32 kUSleepOneSecond = 1000000;
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 7680a29..535bb34 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -26,6 +26,9 @@
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
+/* Define to 1 if you have the <libaio.h> header file. */
+#undef HAVE_LIBAIO_H
+
 /* Define to 1 if you have the <malloc.h> header file. */
 #undef HAVE_MALLOC_H
 
@@ -41,6 +44,9 @@
 /* Define to 1 if you have the <netdb.h> header file. */
 #undef HAVE_NETDB_H
 
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
 /* Define to 1 if you have the `select' function. */
 #undef HAVE_SELECT
 
diff --git a/src/worker.cc b/src/worker.cc
index 08b5a4e..c568064 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -44,7 +44,7 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 // For asynchronous I/O
-#include <linux/aio_abi.h>
+#include <libaio.h>
 
 #include <sys/syscall.h>
 
@@ -77,25 +77,9 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
 
 // Linux aio syscalls.
 #if !defined(__NR_io_setup)
-#define __NR_io_setup   206
-#define __NR_io_destroy 207
-#define __NR_io_getevents       208
-#define __NR_io_submit  209
-#define __NR_io_cancel  210
+#error "No aio headers inculded, please install libaio."
 #endif
 
-#define io_setup(nr_events, ctxp) \
-  syscall(__NR_io_setup, (nr_events), (ctxp))
-#define io_submit(ctx_id, nr, iocbpp) \
-  syscall(__NR_io_submit, (ctx_id), (nr), (iocbpp))
-#define io_getevents(ctx_id, io_getevents, nr, events, timeout) \
-  syscall(__NR_io_getevents, (ctx_id), (io_getevents), (nr), (events), \
-    (timeout))
-#define io_cancel(ctx_id, iocb, result) \
-  syscall(__NR_io_cancel, (ctx_id), (iocb), (result))
-#define io_destroy(ctx) \
-  syscall(__NR_io_destroy, (ctx))
-
 namespace {
   // Get HW core ID from cpuid instruction.
   inline int apicid(void) {
@@ -157,7 +141,6 @@ static void *ThreadSpawnerGeneric(void *ptr) {
   return NULL;
 }
 
-
 void WorkerStatus::Initialize() {
   sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
   sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
@@ -245,10 +228,10 @@ void WorkerStatus::RemoveSelf() {
 
 // Parent thread class.
 WorkerThread::WorkerThread() {
-  status_ = 0;
+  status_ = false;
   pages_copied_ = 0;
   errorcount_ = 0;
-  runduration_usec_ = 0;
+  runduration_usec_ = 1;
   priority_ = Normal;
   worker_status_ = NULL;
   thread_spawner_ = &ThreadSpawnerGeneric;
@@ -310,7 +293,7 @@ void WorkerThread::InitThread(int thread_num_init,
   patternlist_ = patternlist_init;
   worker_status_ = worker_status;
 
-  cpu_mask_ = AvailableCpus();
+  AvailableCpus(&cpu_mask_);
   tag_ = 0xffffffff;
 
   tag_mode_ = sat_->tag_mode();
@@ -321,12 +304,15 @@ void WorkerThread::InitThread(int thread_num_init,
 bool WorkerThread::InitPriority() {
   // This doesn't affect performance that much, and may not be too safe.
 
-  bool ret = BindToCpus(cpu_mask_);
+  bool ret = BindToCpus(&cpu_mask_);
   if (!ret)
-    logprintf(11, "Log: Bind to %x failed.\n", cpu_mask_);
+    logprintf(11, "Log: Bind to %s failed.\n",
+              cpuset_format(&cpu_mask_).c_str());
 
-  logprintf(11, "Log: Thread %d running on apic ID %d mask %x (%x).\n",
-            thread_num_, apicid(), CurrentCpus(), cpu_mask_);
+  logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n",
+            thread_num_, apicid(),
+            CurrentCpusFormat().c_str(),
+            cpuset_format(&cpu_mask_).c_str());
 #if 0
   if (priority_ == High) {
     sched_param param;
@@ -356,7 +342,7 @@ int WorkerThread::SpawnThread() {
     logprintf(0, "Process Error: pthread_create "
                   "failed - error %d %s\n", result,
               buf);
-    status_ += 1;
+    status_ = false;
     return false;
   }
 
@@ -365,18 +351,17 @@ int WorkerThread::SpawnThread() {
 }
 
 // Kill the worker thread with SIGINT.
-int WorkerThread::KillThread() {
-  pthread_kill(thread_, SIGINT);
-  return 0;
+bool WorkerThread::KillThread() {
+  return (pthread_kill(thread_, SIGINT) == 0);
 }
 
 // Block until thread has exited.
-int WorkerThread::JoinThread() {
+bool WorkerThread::JoinThread() {
   int result = pthread_join(thread_, NULL);
 
   if (result) {
     logprintf(0, "Process Error: pthread_join failed - error %d\n", result);
-    status_ = 0;
+    status_ = false;
   }
 
   // 0 is pthreads success.
@@ -394,14 +379,14 @@ void WorkerThread::StartRoutine() {
 
 
 // Thread work loop. Execute until marked finished.
-int WorkerThread::Work() {
+bool WorkerThread::Work() {
   do {
     logprintf(9, "Log: ...\n");
     // Sleep for 1 second.
     sat_sleep(1);
   } while (IsReadyToRun());
 
-  return 0;
+  return false;
 }
 
 
@@ -409,11 +394,9 @@ int WorkerThread::Work() {
 // Conceptually, each bit represents a logical CPU, ie:
 //   mask = 3  (11b):   cpu0, 1
 //   mask = 13 (1101b): cpu0, 2, 3
-uint32 WorkerThread::AvailableCpus() {
-  cpu_set_t curr_cpus;
-  CPU_ZERO(&curr_cpus);
-  sched_getaffinity(getppid(), sizeof(curr_cpus), &curr_cpus);
-  return cpuset_to_uint32(&curr_cpus);
+bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+  return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0;
 }
 
 
@@ -421,11 +404,9 @@ uint32 WorkerThread::AvailableCpus() {
 // Conceptually, each bit represents a logical CPU, ie:
 //   mask = 3  (11b):   cpu0, 1
 //   mask = 13 (1101b): cpu0, 2, 3
-uint32 WorkerThread::CurrentCpus() {
-  cpu_set_t curr_cpus;
-  CPU_ZERO(&curr_cpus);
-  sched_getaffinity(0, sizeof(curr_cpus), &curr_cpus);
-  return cpuset_to_uint32(&curr_cpus);
+bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+  return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0;
 }
 
 
@@ -437,21 +418,22 @@ uint32 WorkerThread::CurrentCpus() {
 //                  mask = 13 (1101b): cpu0, 2, 3
 //
 //   Returns true on success, false otherwise.
-bool WorkerThread::BindToCpus(uint32 thread_mask) {
-  uint32 process_mask = AvailableCpus();
-  if (thread_mask == process_mask)
+bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) {
+  cpu_set_t process_mask;
+  AvailableCpus(&process_mask);
+  if (cpuset_isequal(thread_mask, &process_mask))
     return true;
 
-  logprintf(11, "Log: available CPU mask - %x\n", process_mask);
-  if ((thread_mask | process_mask) != process_mask) {
+  logprintf(11, "Log: available CPU mask - %s\n",
+            cpuset_format(&process_mask).c_str());
+  if (!cpuset_issubset(thread_mask, &process_mask)) {
     // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist.
-    logprintf(0, "Log: requested CPUs %x not a subset of available %x\n",
-              thread_mask, process_mask);
+    logprintf(0, "Log: requested CPUs %s not a subset of available %s\n",
+              cpuset_format(thread_mask).c_str(),
+              cpuset_format(&process_mask).c_str());
     return false;
   }
-  cpu_set_t cpuset;
-  cpuset_from_uint32(thread_mask, &cpuset);
-  return (sched_setaffinity(gettid(), sizeof(cpuset), &cpuset) == 0);
+  return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0);
 }
 
 
@@ -533,8 +515,8 @@ bool FillThread::FillPageRandom(struct page_entry *pe) {
 
 
 // Memory fill work loop. Execute until alloted pages filled.
-int FillThread::Work() {
-  int result = 1;
+bool FillThread::Work() {
+  bool result = true;
 
   logprintf(9, "Log: Starting fill thread %d\n", thread_num_);
 
@@ -544,7 +526,7 @@ int FillThread::Work() {
   struct page_entry pe;
   int64 loops = 0;
   while (IsReadyToRun() && (loops < num_pages_to_fill_)) {
-    result &= sat_->GetEmpty(&pe);
+    result = result && sat_->GetEmpty(&pe);
     if (!result) {
       logprintf(0, "Process Error: fill_thread failed to pop pages, "
                 "bailing\n");
@@ -552,11 +534,11 @@ int FillThread::Work() {
     }
 
     // Fill the page with pattern
-    result &= FillPageRandom(&pe);
+    result = result && FillPageRandom(&pe);
     if (!result) break;
 
     // Put the page back on the queue.
-    result &= sat_->PutValid(&pe);
+    result = result && sat_->PutValid(&pe);
     if (!result) {
       logprintf(0, "Process Error: fill_thread failed to push pages, "
                 "bailing\n");
@@ -570,7 +552,7 @@ int FillThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n",
             thread_num_, status_, pages_copied_);
-  return 0;
+  return result;
 }
 
 
@@ -581,7 +563,6 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
   char dimm_string[256] = "";
 
   int apic_id = apicid();
-  uint32 cpumask = CurrentCpus();
 
   // Determine if this is a write or read error.
   os_->Flush(error->vaddr);
@@ -613,11 +594,11 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                               (error->vaddr), 1);
 
     logprintf(priority,
-              "%s: miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
+              "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
               apic_id,
-              cpumask,
+              CurrentCpusFormat().c_str(),
               error->vaddr,
               error->paddr,
               dimm_string,
@@ -950,7 +931,6 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
   bool read_error = false;
 
   int apic_id = apicid();
-  uint32 cpumask = CurrentCpus();
 
   // Determine if this is a write or read error.
   os_->Flush(error->vaddr);
@@ -978,14 +958,14 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
   if (priority < 5) {
     logprintf(priority,
               "%s: Tag from %p(0x%llx:%s) (%s) "
-              "miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
+              "miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
               error->tagvaddr, error->tagpaddr,
               tag_dimm_string,
               read_error ? "read error" : "write error",
               apic_id,
-              cpumask,
+              CurrentCpusFormat().c_str(),
               error->vaddr,
               error->paddr,
               dimm_string,
@@ -1090,6 +1070,46 @@ bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64,
   return true;
 }
 
+// x86_64 SSE2 assembly implementation of Adler memory copy, with address
+// tagging added as a second step. This is useful for debugging failures
+// that only occur when SSE / nontemporal writes are used.
+bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
+                                       uint64 *srcmem64,
+                                       unsigned int size_in_bytes,
+                                       AdlerChecksum *checksum,
+                                       struct page_entry *pe) {
+  // Do ASM copy, ignore checksum.
+  AdlerChecksum ignored_checksum;
+  os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
+
+  // Force cache flush.
+  int length = size_in_bytes / sizeof(*dstmem64);
+  for (int i = 0; i < length; i += sizeof(*dstmem64)) {
+    os_->FastFlush(dstmem64 + i);
+    os_->FastFlush(srcmem64 + i);
+  }
+  // Check results.
+  AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
+  // Patch up address tags.
+  TagAddrC(dstmem64, size_in_bytes);
+  return true;
+}
+
+// Retag pages..
+bool WorkerThread::TagAddrC(uint64 *memwords,
+                            unsigned int size_in_bytes) {
+  // Mask is the bitmask of indexes used by the pattern.
+  // It is the pattern size -1. Size is always a power of 2.
+
+  // Select tag or data as appropriate.
+  int length = size_in_bytes / wordsize_;
+  for (int i = 0; i < length; i += 8) {
+    datacast_t data;
+    data.l64 = addr_to_tag(&memwords[i]);
+    memwords[i] = data.l64;
+  }
+  return true;
+}
 
 // C implementation of Adler memory crc.
 bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
@@ -1122,15 +1142,12 @@ bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
       if (data.l64 != src_tag)
         ReportTagError(&srcmem64[i], data.l64, src_tag);
 
-
       data.l32.l = pattern->pattern(i << 1);
       data.l32.h = pattern->pattern((i << 1) + 1);
       a1 = a1 + data.l32.l;
       b1 = b1 + a1;
       a1 = a1 + data.l32.h;
       b1 = b1 + a1;
-
-
     } else {
       data.l64 = srcmem64[i];
       a1 = a1 + data.l32.l;
@@ -1202,11 +1219,10 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
             int apic_id = apicid();
-            uint32 cpumask = CurrentCpus();
-            logprintf(0, "Process Error: CPU %d(0x%x) CrcCopyPage "
+            logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
                          "CRC mismatch %s != %s, "
                          "but no miscompares found on second pass.\n",
-                      apic_id, cpumask,
+                      apic_id, CurrentCpusFormat().c_str(),
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
             struct ErrorRecord er;
@@ -1317,7 +1333,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
 
     AdlerChecksum crc;
     if (tag_mode_) {
-      AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
+      AdlerAddrMemcpyWarm(targetmem, sourcemem, blocksize, &crc, srcpe);
     } else {
       os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc);
     }
@@ -1346,10 +1362,18 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
-            logprintf(0, "Process Error: CrcWarmCopyPage CRC mismatch %s "
-                         "!= %s, but no miscompares found on second pass.\n",
+            int apic_id = apicid();
+            logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
+                         "CRC mismatch %s != %s, "
+                         "but no miscompares found on second pass.\n",
+                      apic_id, CurrentCpusFormat().c_str(),
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
+            struct ErrorRecord er;
+            er.actual = sourcemem[0];
+            er.expected = 0x0;
+            er.vaddr = sourcemem;
+            ProcessError(&er, 0, "Hardware Error");
           }
         }
       }
@@ -1388,23 +1412,23 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
 
 
 // Memory check work loop. Execute until done, then exhaust pages.
-int CheckThread::Work() {
+bool CheckThread::Work() {
   struct page_entry pe;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
 
   logprintf(9, "Log: Starting Check thread %d\n", thread_num_);
 
   // We want to check all the pages, and
   // stop when there aren't any left.
-  while (1) {
-    result &= sat_->GetValid(&pe);
+  while (true) {
+    result = result && sat_->GetValid(&pe);
     if (!result) {
       if (IsReadyToRunNoPause())
         logprintf(0, "Process Error: check_thread failed to pop pages, "
                   "bailing\n");
       else
-        result = 1;
+        result = true;
       break;
     }
 
@@ -1414,9 +1438,9 @@ int CheckThread::Work() {
     // Push pages back on the valid queue if we are still going,
     // throw them out otherwise.
     if (IsReadyToRunNoPause())
-      result &= sat_->PutValid(&pe);
+      result = result && sat_->PutValid(&pe);
     else
-      result &= sat_->PutEmpty(&pe);
+      result = result && sat_->PutEmpty(&pe);
     if (!result) {
       logprintf(0, "Process Error: check_thread failed to push pages, "
                 "bailing\n");
@@ -1429,24 +1453,24 @@ int CheckThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 
 // Memory copy work loop. Execute until marked done.
-int CopyThread::Work() {
+bool CopyThread::Work() {
   struct page_entry src;
   struct page_entry dst;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
 
-  logprintf(9, "Log: Starting copy thread %d: cpu %x, mem %x\n",
-            thread_num_, cpu_mask_, tag_);
+  logprintf(9, "Log: Starting copy thread %d: cpu %s, mem %x\n",
+            thread_num_, cpuset_format(&cpu_mask_).c_str(), tag_);
 
   while (IsReadyToRun()) {
     // Pop the needed pages.
-    result &= sat_->GetValid(&src, tag_);
-    result &= sat_->GetEmpty(&dst, tag_);
+    result = result && sat_->GetValid(&src, tag_);
+    result = result && sat_->GetEmpty(&dst, tag_);
     if (!result) {
       logprintf(0, "Process Error: copy_thread failed to pop pages, "
                 "bailing\n");
@@ -1472,8 +1496,8 @@ int CopyThread::Work() {
       dst.pattern = src.pattern;
     }
 
-    result &= sat_->PutValid(&dst);
-    result &= sat_->PutEmpty(&src);
+    result = result && sat_->PutValid(&dst);
+    result = result && sat_->PutEmpty(&src);
 
     // Copy worker-threads yield themselves at the end of each copy loop,
     // to avoid threads from preempting each other in the middle of the inner
@@ -1494,20 +1518,20 @@ int CopyThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 // Memory invert work loop. Execute until marked done.
-int InvertThread::Work() {
+bool InvertThread::Work() {
   struct page_entry src;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
 
   logprintf(9, "Log: Starting invert thread %d\n", thread_num_);
 
   while (IsReadyToRun()) {
     // Pop the needed pages.
-    result &= sat_->GetValid(&src);
+    result = result && sat_->GetValid(&src);
     if (!result) {
       logprintf(0, "Process Error: invert_thread failed to pop pages, "
                 "bailing\n");
@@ -1533,7 +1557,7 @@ int InvertThread::Work() {
     if (sat_->strict())
       CrcCheckPage(&src);
 
-    result &= sat_->PutValid(&src);
+    result = result && sat_->PutValid(&src);
     if (!result) {
       logprintf(0, "Process Error: invert_thread failed to push pages, "
                 "bailing\n");
@@ -1546,7 +1570,7 @@ int InvertThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 
@@ -1565,17 +1589,16 @@ bool FileThread::OpenFile(int *pfile) {
     logprintf(0, "Process Error: Failed to create file %s!!\n",
               filename_.c_str());
     pages_copied_ = 0;
-    status_ = 0;
-    return 0;
+    return false;
   }
   *pfile = fd;
-  return 1;
+  return true;
 }
 
 // Close the file.
 bool FileThread::CloseFile(int fd) {
   close(fd);
-  return 1;
+  return true;
 }
 
 // Check sector tagging.
@@ -1615,7 +1638,7 @@ bool FileThread::WritePages(int fd) {
   int strict = sat_->strict();
 
   // Start fresh at beginning of file for each batch of pages.
-  lseek(fd, 0, SEEK_SET);
+  lseek64(fd, 0, SEEK_SET);
   for (int i = 0; i < sat_->disk_pages(); i++) {
     struct page_entry src;
     if (!GetValidPage(&src))
@@ -1770,7 +1793,7 @@ bool FileThread::PagePrepare() {
       logprintf(0, "Process Error: disk thread posix_memalign "
                    "returned %d (fail)\n",
                 result);
-      status_ += 1;
+      status_ = false;
       return false;
     }
   }
@@ -1839,17 +1862,14 @@ bool FileThread::PutValidPage(struct page_entry *src) {
   return true;
 }
 
-
-
 // Copy data from file into memory blocks.
 bool FileThread::ReadPages(int fd) {
   int page_length = sat_->page_length();
   int strict = sat_->strict();
-  int result = 1;
-
+  bool result = true;
 
   // Read our data back out of the file, into it's new location.
-  lseek(fd, 0, SEEK_SET);
+  lseek64(fd, 0, SEEK_SET);
   for (int i = 0; i < sat_->disk_pages(); i++) {
     struct page_entry dst;
     if (!GetEmptyPage(&dst))
@@ -1888,11 +1908,9 @@ bool FileThread::ReadPages(int fd) {
   return result;
 }
 
-
 // File IO work loop. Execute until marked done.
-int FileThread::Work() {
-  int result = 1;
-  int fileresult = 1;
+bool FileThread::Work() {
+  bool result = true;
   int64 loops = 0;
 
   logprintf(9, "Log: Starting file thread %d, file %s, device %s\n",
@@ -1900,13 +1918,17 @@ int FileThread::Work() {
             filename_.c_str(),
             devicename_.c_str());
 
-  if (!PagePrepare())
-    return 0;
+  if (!PagePrepare()) {
+    status_ = false;
+    return false;
+  }
 
   // Open the data IO file.
   int fd = 0;
-  if (!OpenFile(&fd))
-    return 0;
+  if (!OpenFile(&fd)) {
+    status_ = false;
+    return false;
+  }
 
   pass_ = 0;
 
@@ -1919,11 +1941,11 @@ int FileThread::Work() {
   // Loop until done.
   while (IsReadyToRun()) {
     // Do the file write.
-    if (!(fileresult &= WritePages(fd)))
+    if (!(result = result && WritePages(fd)))
       break;
 
     // Do the file read.
-    if (!(fileresult &= ReadPages(fd)))
+    if (!(result = result && ReadPages(fd)))
       break;
 
     loops++;
@@ -1939,7 +1961,7 @@ int FileThread::Work() {
 
   logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 bool NetworkThread::IsNetworkStopSet() {
@@ -1965,7 +1987,7 @@ bool NetworkThread::CreateSocket(int *psocket) {
   if (sock == -1) {
     logprintf(0, "Process Error: Cannot open socket\n");
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   *psocket = sock;
@@ -1989,7 +2011,7 @@ bool NetworkThread::Connect(int sock) {
   if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) {
     logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_);
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
 
@@ -1997,7 +2019,7 @@ bool NetworkThread::Connect(int sock) {
                     sizeof(struct sockaddr))) {
     logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_);
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   return true;
@@ -2018,7 +2040,7 @@ bool NetworkListenThread::Listen() {
     sat_strerror(errno, buf, sizeof(buf));
     logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   listen(sock_, 3);
@@ -2052,13 +2074,14 @@ bool NetworkListenThread::GetConnection(int *pnewsock) {
   if (newsock < 0)  {
     logprintf(0, "Process Error: Did not receive connection\n");
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   *pnewsock = newsock;
   return true;
 }
 
+// Send a page, return false if a page was not sent.
 bool NetworkThread::SendPage(int sock, struct page_entry *src) {
   int page_length = sat_->page_length();
   char *address = static_cast<char*>(src->addr);
@@ -2074,6 +2097,7 @@ bool NetworkThread::SendPage(int sock, struct page_entry *src) {
         logprintf(0, "Process Error: Thread %d, "
                      "Network write failed, bailing. (%s)\n",
                   thread_num_, buf);
+        status_ = false;
       }
       return false;
     }
@@ -2082,7 +2106,7 @@ bool NetworkThread::SendPage(int sock, struct page_entry *src) {
   return true;
 }
 
-
+// Receive a page. Return false if a page was not received.
 bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
   int page_length = sat_->page_length();
   char *address = static_cast<char*>(dst->addr);
@@ -2107,6 +2131,7 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
           logprintf(0, "Process Error: Thread %d, "
                        "Network read failed, bailing (%s).\n",
                     thread_num_, buf);
+          status_ = false;
           // Print arguments and results.
           logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n",
                     sock, address + (page_length - size),
@@ -2129,9 +2154,9 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
   return true;
 }
 
-
 // Network IO work loop. Execute until marked done.
-int NetworkThread::Work() {
+// Return true if the thread ran as expected.
+bool NetworkThread::Work() {
   logprintf(9, "Log: Starting network thread %d, ip %s\n",
             thread_num_,
             ipaddr_);
@@ -2139,7 +2164,7 @@ int NetworkThread::Work() {
   // Make a socket.
   int sock = 0;
   if (!CreateSocket(&sock))
-    return 0;
+    return false;
 
   // Network IO loop requires network slave thread to have already initialized.
   // We will sleep here for awhile to ensure that the slave thread will be
@@ -2153,17 +2178,17 @@ int NetworkThread::Work() {
 
   // Connect to a slave thread.
   if (!Connect(sock))
-    return 0;
+    return false;
 
   // Loop until done.
-  int result = 1;
+  bool result = true;
   int strict = sat_->strict();
   int64 loops = 0;
   while (IsReadyToRun()) {
     struct page_entry src;
     struct page_entry dst;
-    result &= sat_->GetValid(&src);
-    result &= sat_->GetEmpty(&dst);
+    result = result && sat_->GetValid(&src);
+    result = result && sat_->GetEmpty(&dst);
     if (!result) {
       logprintf(0, "Process Error: net_thread failed to pop pages, "
                 "bailing\n");
@@ -2175,14 +2200,14 @@ int NetworkThread::Work() {
       CrcCheckPage(&src);
 
     // Do the network write.
-    if (!(result &= SendPage(sock, &src)))
+    if (!(result = result && SendPage(sock, &src)))
       break;
 
     // Update pattern reference to reflect new contents.
     dst.pattern = src.pattern;
 
     // Do the network read.
-    if (!(result &= ReceivePage(sock, &dst)))
+    if (!(result = result && ReceivePage(sock, &dst)))
       break;
 
     // Ensure that the transfer ended up with correct data.
@@ -2190,8 +2215,8 @@ int NetworkThread::Work() {
       CrcCheckPage(&dst);
 
     // Return all of our pages to the queue.
-    result &= sat_->PutValid(&dst);
-    result &= sat_->PutEmpty(&src);
+    result = result && sat_->PutValid(&dst);
+    result = result && sat_->PutEmpty(&src);
     if (!result) {
       logprintf(0, "Process Error: net_thread failed to push pages, "
                 "bailing\n");
@@ -2209,7 +2234,7 @@ int NetworkThread::Work() {
   logprintf(9, "Log: Completed %d: network thread status %d, "
                "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 // Spawn slave threads for incoming connections.
@@ -2253,15 +2278,17 @@ bool NetworkListenThread::ReapSlaves() {
 }
 
 // Network listener IO work loop. Execute until marked done.
-int NetworkListenThread::Work() {
-  int result = 1;
+// Return false on fatal software error.
+bool NetworkListenThread::Work() {
   logprintf(9, "Log: Starting network listen thread %d\n",
             thread_num_);
 
   // Make a socket.
   sock_ = 0;
-  if (!CreateSocket(&sock_))
-    return 0;
+  if (!CreateSocket(&sock_)) {
+    status_ = false;
+    return false;
+  }
   logprintf(9, "Log: Listen thread created sock\n");
 
   // Allows incoming connections to be queued up by socket library.
@@ -2296,12 +2323,12 @@ int NetworkListenThread::Work() {
 
   CloseSocket(sock_);
 
-  status_ = result;
+  status_ = true;
   logprintf(9,
             "Log: Completed %d: network listen thread status %d, "
             "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return true;
 }
 
 // Set network reflector socket struct.
@@ -2310,14 +2337,17 @@ void NetworkSlaveThread::SetSock(int sock) {
 }
 
 // Network reflector IO work loop. Execute until marked done.
-int NetworkSlaveThread::Work() {
+// Return false on fatal software error.
+bool NetworkSlaveThread::Work() {
   logprintf(9, "Log: Starting network slave thread %d\n",
             thread_num_);
 
   // Verify that we have a socket.
   int sock = sock_;
-  if (!sock)
-    return 0;
+  if (!sock) {
+    status_ = false;
+    return false;
+  }
 
   // Loop until done.
   int64 loops = 0;
@@ -2328,7 +2358,7 @@ int NetworkSlaveThread::Work() {
     logprintf(0, "Process Error: net slave posix_memalign "
                  "returned %d (fail)\n",
               result);
-    status_ += 1;
+    status_ = false;
     return false;
   }
 
@@ -2351,7 +2381,7 @@ int NetworkSlaveThread::Work() {
 
   pages_copied_ = loops;
   // No results provided from this type of thread.
-  status_ = 1;
+  status_ = true;
 
   // Clean up.
   CloseSocket(sock);
@@ -2360,11 +2390,11 @@ int NetworkSlaveThread::Work() {
             "Log: Completed %d: network slave thread status %d, "
             "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return status_;
+  return true;
 }
 
 // Thread work loop. Execute until marked finished.
-int ErrorPollThread::Work() {
+bool ErrorPollThread::Work() {
   logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_);
 
   // This calls a generic error polling function in the Os abstraction layer.
@@ -2375,12 +2405,13 @@ int ErrorPollThread::Work() {
 
   logprintf(9, "Log: Finished system error poll thread %d: %d errors\n",
             thread_num_, errorcount_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
 }
 
 // Worker thread to heat up CPU.
-int CpuStressThread::Work() {
+// This thread does not evaluate pass/fail or software error.
+bool CpuStressThread::Work() {
   logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_);
 
   do {
@@ -2391,8 +2422,8 @@ int CpuStressThread::Work() {
 
   logprintf(9, "Log: Finished CPU stress thread %d:\n",
             thread_num_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
 }
 
 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
@@ -2406,7 +2437,8 @@ CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
 }
 
 // Worked thread to test the cache coherency of the CPUs
-int CpuCacheCoherencyThread::Work() {
+// Return false on fatal sw error.
+bool CpuCacheCoherencyThread::Work() {
   logprintf(9, "Log: Starting the Cache Coherency thread %d\n",
             cc_thread_num_);
   uint64 time_start, time_end;
@@ -2459,8 +2491,8 @@ int CpuCacheCoherencyThread::Work() {
             cc_thread_num_, us_elapsed, total_inc, inc_rate);
   logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n",
             cc_thread_num_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
 }
 
 DiskThread::DiskThread(DiskBlockTable *block_table) {
@@ -2489,9 +2521,14 @@ DiskThread::DiskThread(DiskBlockTable *block_table) {
   update_block_table_ = 1;
 
   block_buffer_ = NULL;
+
+  blocks_written_ = 0;
+  blocks_read_ = 0;
 }
 
 DiskThread::~DiskThread() {
+  if (block_buffer_)
+    free(block_buffer_);
 }
 
 // Set filename for device file (in /dev).
@@ -2616,6 +2653,7 @@ bool DiskThread::SetParameters(int read_block_size,
   return true;
 }
 
+// Open a device, return false on failure.
 bool DiskThread::OpenDevice(int *pfile) {
   int fd = open(device_name_.c_str(),
                 O_RDWR | O_SYNC | O_DIRECT | O_LARGEFILE,
@@ -2631,6 +2669,7 @@ bool DiskThread::OpenDevice(int *pfile) {
 }
 
 // Retrieves the size (in bytes) of the disk/file.
+// Return false on failure.
 bool DiskThread::GetDiskSize(int fd) {
   struct stat device_stat;
   if (fstat(fd, &device_stat) == -1) {
@@ -2654,7 +2693,7 @@ bool DiskThread::GetDiskSize(int fd) {
     if (block_size == 0) {
       os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
       ++errorcount_;
-      status_ = 1;  // Avoid a procedural error.
+      status_ = true;  // Avoid a procedural error.
       return false;
     }
 
@@ -2694,11 +2733,12 @@ int64 DiskThread::GetTime() {
   return tv.tv_sec * 1000000 + tv.tv_usec;
 }
 
+// Do randomized reads and (possibly) writes on a device.
+// Return false on fatal error, either SW or HW.
 bool DiskThread::DoWork(int fd) {
   int64 block_num = 0;
-  blocks_written_ = 0;
-  blocks_read_ = 0;
   int64 num_segments;
+  bool result = true;
 
   if (segment_size_ == -1) {
     num_segments = 1;
@@ -2731,13 +2771,15 @@ bool DiskThread::DoWork(int fd) {
 
   while (IsReadyToRun()) {
     // Write blocks to disk.
-    logprintf(16, "Write phase for disk %s (thread %d).\n",
+    logprintf(16, "Log: Write phase %sfor disk %s (thread %d).\n",
+              non_destructive_ ? "(disabled) " : "",
               device_name_.c_str(), thread_num_);
     while (IsReadyToRunNoPause() &&
            in_flight_sectors_.size() < queue_size_ + 1) {
       // Confine testing to a particular segment of the disk.
       int64 segment = (block_num / blocks_per_segment_) % num_segments;
-      if (block_num % blocks_per_segment_ == 0) {
+      if (!non_destructive_ &&
+          (block_num % blocks_per_segment_ == 0)) {
         logprintf(20, "Log: Starting to write segment %lld out of "
                   "%lld on disk %s (thread %d).\n",
                   segment, num_segments, device_name_.c_str(),
@@ -2768,33 +2810,37 @@ bool DiskThread::DoWork(int fd) {
       if (!non_destructive_) {
         if (!WriteBlockToDisk(fd, block)) {
           block_table_->RemoveBlock(block);
-          continue;
+          return false;
         }
+        blocks_written_++;
       }
 
+      // Block is either initialized by writing, or in nondestructive case,
+      // initialized by being added into the datastructure for later reading.
       block->SetBlockAsInitialized();
 
-      blocks_written_++;
       in_flight_sectors_.push(block);
     }
 
     // Verify blocks on disk.
-    logprintf(20, "Read phase for disk %s (thread %d).\n",
+    logprintf(20, "Log: Read phase for disk %s (thread %d).\n",
               device_name_.c_str(), thread_num_);
     while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) {
       BlockData *block = in_flight_sectors_.front();
       in_flight_sectors_.pop();
-      ValidateBlockOnDisk(fd, block);
+      if (!ValidateBlockOnDisk(fd, block))
+        return false;
       block_table_->RemoveBlock(block);
       blocks_read_++;
     }
   }
 
   pages_copied_ = blocks_written_ + blocks_read_;
-  return true;
+  return result;
 }
 
 // Do an asynchronous disk I/O operation.
+// Return false if the IO is not set up.
 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
                             int64 offset, int64 timeout) {
   // Use the Linux native asynchronous I/O interface for reading/writing.
@@ -2808,8 +2854,8 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     const char *op_str;
     const char *error_str;
   } operations[2] = {
-    { IOCB_CMD_PREAD, "read", "disk-read-error" },
-    { IOCB_CMD_PWRITE, "write", "disk-write-error" }
+    { IO_CMD_PREAD, "read", "disk-read-error" },
+    { IO_CMD_PWRITE, "write", "disk-write-error" }
   };
 
   struct iocb cb;
@@ -2817,16 +2863,19 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
 
   cb.aio_fildes = fd;
   cb.aio_lio_opcode = operations[op].opcode;
-  cb.aio_buf = (__u64)buf;
-  cb.aio_nbytes = size;
-  cb.aio_offset = offset;
+  cb.u.c.buf = buf;
+  cb.u.c.nbytes = size;
+  cb.u.c.offset = offset;
 
   struct iocb *cbs[] = { &cb };
   if (io_submit(aio_ctx_, 1, cbs) != 1) {
+    int error = errno;
+    char buf[256];
+    sat_strerror(error, buf, sizeof(buf));
     logprintf(0, "Process Error: Unable to submit async %s "
-                 "on disk %s (thread %d).\n",
+                 "on disk %s (thread %d). Error %d, %s\n",
               operations[op].op_str, device_name_.c_str(),
-              thread_num_);
+              thread_num_, error, buf);
     return false;
   }
 
@@ -2839,7 +2888,8 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     // A ctrl-c from the keyboard will cause io_getevents to fail with an
     // EINTR error code.  This is not an error and so don't treat it as such,
     // but still log it.
-    if (errno == EINTR) {
+    int error = errno;
+    if (error == EINTR) {
       logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n",
                 operations[op].op_str, device_name_.c_str(),
                 thread_num_);
@@ -2860,9 +2910,12 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     io_destroy(aio_ctx_);
     aio_ctx_ = 0;
     if (io_setup(5, &aio_ctx_)) {
+      int error = errno;
+      char buf[256];
+      sat_strerror(error, buf, sizeof(buf));
       logprintf(0, "Process Error: Unable to create aio context on disk %s"
-                " (thread %d).\n",
-                device_name_.c_str(), thread_num_);
+                " (thread %d) Error %d, %s\n",
+                device_name_.c_str(), thread_num_, error, buf);
     }
 
     return false;
@@ -2901,6 +2954,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
 }
 
 // Write a block to disk.
+// Return false if the block is not written.
 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
   memset(block_buffer_, 0, block->GetSize());
 
@@ -2951,6 +3005,8 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
 }
 
 // Verify a block on disk.
+// Return true if the block was read, also increment errorcount
+// if the block had data errors or performance problems.
 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
   int64 blocks = block->GetSize() / read_block_size_;
   int64 bytes_read = 0;
@@ -2964,7 +3020,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
 
   // Read block from disk and time the read.  If it takes longer than the
   // threshold, complain.
-  if (lseek(fd, address * kSectorSize, SEEK_SET) == -1) {
+  if (lseek64(fd, address * kSectorSize, SEEK_SET) == -1) {
     logprintf(0, "Process Error: Unable to seek to sector %lld in "
               "DiskThread::ValidateSectorsOnDisk on disk %s "
               "(thread %d).\n", address, device_name_.c_str(), thread_num_);
@@ -2976,7 +3032,6 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
   // read them in groups of randomly-sized multiples of read block size.
   // This assures all data written on disk by this particular block
   // will be tested using a random reading pattern.
-
   while (blocks != 0) {
     // Test all read blocks in a written block.
     current_blocks = (random() % blocks) + 1;
@@ -3027,7 +3082,9 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
   return true;
 }
 
-int DiskThread::Work() {
+// Direct device access thread.
+// Return false on software error.
+bool DiskThread::Work() {
   int fd;
 
   logprintf(9, "Log: Starting disk thread %d, disk %s\n",
@@ -3036,42 +3093,43 @@ int DiskThread::Work() {
   srandom(time(NULL));
 
   if (!OpenDevice(&fd)) {
-    return 0;
+    status_ = false;
+    return false;
   }
 
   // Allocate a block buffer aligned to 512 bytes since the kernel requires it
   // when using direst IO.
-
-  int result = posix_memalign(&block_buffer_, kBufferAlignment,
+  int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
                               sat_->page_length());
-  if (result) {
+  if (memalign_result) {
     CloseDevice(fd);
     logprintf(0, "Process Error: Unable to allocate memory for buffers "
                  "for disk %s (thread %d) posix memalign returned %d.\n",
-              device_name_.c_str(), thread_num_, result);
-    status_ += 1;
+              device_name_.c_str(), thread_num_, memalign_result);
+    status_ = false;
     return false;
   }
 
   if (io_setup(5, &aio_ctx_)) {
+    CloseDevice(fd);
     logprintf(0, "Process Error: Unable to create aio context for disk %s"
               " (thread %d).\n",
               device_name_.c_str(), thread_num_);
-    return 0;
+    status_ = false;
+    return false;
   }
 
-  DoWork(fd);
+  bool result = DoWork(fd);
 
-  status_ = 1;
+  status_ = result;
 
   io_destroy(aio_ctx_);
   CloseDevice(fd);
-  free(block_buffer_);
 
   logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
                "%d pages copied\n",
             thread_num_, device_name_.c_str(), status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
@@ -3082,15 +3140,14 @@ RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
 RandomDiskThread::~RandomDiskThread() {
 }
 
+// Workload for random disk thread.
 bool RandomDiskThread::DoWork(int fd) {
-  blocks_read_ = 0;
-  blocks_written_ = 0;
-  logprintf(11, "Random phase for disk %s (thread %d).\n",
+  logprintf(11, "Log: Random phase for disk %s (thread %d).\n",
             device_name_.c_str(), thread_num_);
   while (IsReadyToRun()) {
     BlockData *block = block_table_->GetRandomBlock();
     if (block == NULL) {
-      logprintf(12, "No block available for device %s (thread %d).\n",
+      logprintf(12, "Log: No block available for device %s (thread %d).\n",
                 device_name_.c_str(), thread_num_);
     } else {
       ValidateBlockOnDisk(fd, block);
@@ -3112,6 +3169,8 @@ MemoryRegionThread::~MemoryRegionThread() {
     delete pages_;
 }
 
+// Set a region of memory or MMIO to be tested.
+// Return false if region could not be mapped.
 bool MemoryRegionThread::SetRegion(void *region, int64 size) {
   int plength = sat_->page_length();
   int npages = size / plength;
@@ -3137,6 +3196,8 @@ bool MemoryRegionThread::SetRegion(void *region, int64 size) {
   }
 }
 
+// More detailed error printout for hardware errors in memory or MMIO
+// regions.
 void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
                                       int priority,
                                       const char *message) {
@@ -3187,10 +3248,12 @@ void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
   }
 }
 
-int MemoryRegionThread::Work() {
+// Workload for testion memory or MMIO regions.
+// Return false on software error.
+bool MemoryRegionThread::Work() {
   struct page_entry source_pe;
   struct page_entry memregion_pe;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
   const uint64 error_constant = 0x00ba00000000ba00LL;
 
@@ -3204,14 +3267,14 @@ int MemoryRegionThread::Work() {
   while (IsReadyToRun()) {
     // Getting pages from SAT and queue.
     phase_ = kPhaseNoPhase;
-    result &= sat_->GetValid(&source_pe);
+    result = result && sat_->GetValid(&source_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to pop "
                 "pages from SAT, bailing\n");
       break;
     }
 
-    result &= pages_->PopRandom(&memregion_pe);
+    result = result && pages_->PopRandom(&memregion_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to pop "
                 "pages from queue, bailing\n");
@@ -3245,13 +3308,13 @@ int MemoryRegionThread::Work() {
 
     phase_ = kPhaseNoPhase;
     // Storing pages on their proper queues.
-    result &= sat_->PutValid(&source_pe);
+    result = result && sat_->PutValid(&source_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to push "
                 "pages into SAT, bailing\n");
       break;
     }
-    result &= pages_->Push(&memregion_pe);
+    result = result && pages_->Push(&memregion_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to push "
                 "pages into queue, bailing\n");
@@ -3271,5 +3334,5 @@ int MemoryRegionThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d "
             "pages checked\n", thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
diff --git a/src/worker.h b/src/worker.h
index b85f926..7aae5f2 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -26,7 +26,7 @@
 #include <sys/time.h>
 #include <sys/types.h>
 
-#include <linux/aio_abi.h>
+#include <libaio.h>
 
 #include <queue>
 #include <set>
@@ -207,11 +207,11 @@ class WorkerThread {
   virtual ~WorkerThread();
 
   // Initialize values and thread ID number.
-  void InitThread(int thread_num_init,
-                  class Sat *sat_init,
-                  class OsLayer *os_init,
-                  class PatternList *patternlist_init,
-                  WorkerStatus *worker_status);
+  virtual void InitThread(int thread_num_init,
+                          class Sat *sat_init,
+                          class OsLayer *os_init,
+                          class PatternList *patternlist_init,
+                          WorkerStatus *worker_status);
 
   // This function is DEPRECATED, it does nothing.
   void SetPriority(Priority priority) { priority_ = priority; }
@@ -222,13 +222,13 @@ class WorkerThread {
   bool InitPriority();
 
   // Wait for the thread to complete its cleanup.
-  virtual int JoinThread();
+  virtual bool JoinThread();
   // Kill worker thread with SIGINT.
-  virtual int KillThread();
+  virtual bool KillThread();
 
   // This is the task function that the thread executes.
   // This is implemented per subclass.
-  virtual int Work();
+  virtual bool Work();
 
   // Starts per-WorkerThread timer.
   void StartThreadTimer() {gettimeofday(&start_time_, NULL);}
@@ -247,13 +247,13 @@ class WorkerThread {
   }
 
   // Acccess member variables.
-  int GetStatus() {return status_;}
+  bool GetStatus() {return status_;}
   int64 GetErrorCount() {return errorcount_;}
   int64 GetPageCount() {return pages_copied_;}
   int64 GetRunDurationUSec() {return runduration_usec_;}
 
   // Returns bandwidth defined as pages_copied / thread_run_durations.
-  float GetCopiedData();
+  virtual float GetCopiedData();
   // Calculate worker thread specific copied data.
   virtual float GetMemoryCopiedData() {return 0;}
   virtual float GetDeviceCopiedData() {return 0;}
@@ -265,18 +265,31 @@ class WorkerThread {
     {return GetDeviceCopiedData() / (
         runduration_usec_ * 1.0 / 1000000);}
 
-  void set_cpu_mask(int32 mask) {cpu_mask_ = mask;}
+  void set_cpu_mask(cpu_set_t *mask) {
+    memcpy(&cpu_mask_, mask, sizeof(*mask));
+  }
+
+  void set_cpu_mask_to_cpu(int cpu_num) {
+    cpuset_set_ab(&cpu_mask_, cpu_num, cpu_num + 1);
+  }
+
   void set_tag(int32 tag) {tag_ = tag;}
 
   // Returns CPU mask, where each bit represents a logical cpu.
-  uint32 AvailableCpus();
+  bool AvailableCpus(cpu_set_t *cpuset);
   // Returns CPU mask of CPUs this thread is bound to,
-  uint32 CurrentCpus();
+  bool CurrentCpus(cpu_set_t *cpuset);
+  // Returns Current Cpus mask as string.
+  string CurrentCpusFormat() {
+    cpu_set_t current_cpus;
+    CurrentCpus(&current_cpus);
+    return cpuset_format(&current_cpus);
+  }
 
   int ThreadID() {return thread_num_;}
 
   // Bind worker thread to specified CPU(s)
-  bool BindToCpus(uint32 thread_mask);
+  bool BindToCpus(const cpu_set_t *cpuset);
 
  protected:
   // This function dictates whether the main work loop
@@ -326,17 +339,26 @@ class WorkerThread {
                                 unsigned int size_in_bytes,
                                 AdlerChecksum *checksum,
                                 struct page_entry *pe);
+  // SSE copy with address tagging.
+  virtual bool AdlerAddrMemcpyWarm(uint64 *dstmem64,
+                                   uint64 *srcmem64,
+                                   unsigned int size_in_bytes,
+                                   AdlerChecksum *checksum,
+                                   struct page_entry *pe);
   // Crc data with address tagging.
   virtual bool AdlerAddrCrcC(uint64 *srcmem64,
                              unsigned int size_in_bytes,
                              AdlerChecksum *checksum,
                              struct page_entry *pe);
+  // Setup tagging on an existing page.
+  virtual bool TagAddrC(uint64 *memwords,
+                        unsigned int size_in_bytes);
   // Report a mistagged cacheline.
-  bool ReportTagError(uint64 *mem64,
+  virtual bool ReportTagError(uint64 *mem64,
                       uint64 actual,
                       uint64 tag);
   // Print out the error record of the tag mismatch.
-  void ProcessTagError(struct ErrorRecord *error,
+  virtual void ProcessTagError(struct ErrorRecord *error,
                        int priority,
                        const char *message);
 
@@ -346,11 +368,11 @@ class WorkerThread {
  protected:
   // General state variables that all subclasses need.
   int thread_num_;                  // Thread ID.
-  volatile int status_;             // Error status.
+  volatile bool status_;            // Error status.
   volatile int64 pages_copied_;     // Recorded for memory bandwidth calc.
   volatile int64 errorcount_;       // Miscompares seen by this thread.
 
-  volatile uint32 cpu_mask_;        // Cores this thread is allowed to run on.
+  cpu_set_t cpu_mask_;              // Cores this thread is allowed to run on.
   volatile uint32 tag_;             // Tag hint for memory this thread can use.
 
   bool tag_mode_;                   // Tag cachelines with vaddr.
@@ -383,7 +405,7 @@ class FileThread : public WorkerThread {
   FileThread();
   // Set filename to use for file IO.
   virtual void SetFile(const char *filename_init);
-  virtual int Work();
+  virtual bool Work();
 
   // Calculate worker thread specific bandwidth.
   virtual float GetDeviceCopiedData()
@@ -466,7 +488,7 @@ class NetworkThread : public WorkerThread {
   NetworkThread();
   // Set hostname to use for net IO.
   virtual void SetIP(const char *ipaddr_init);
-  virtual int Work();
+  virtual bool Work();
 
   // Calculate worker thread specific bandwidth.
   virtual float GetDeviceCopiedData()
@@ -493,7 +515,7 @@ class NetworkSlaveThread : public NetworkThread {
   NetworkSlaveThread();
   // Set socket for IO.
   virtual void SetSock(int sock);
-  virtual int Work();
+  virtual bool Work();
 
  protected:
   virtual bool IsNetworkStopSet();
@@ -506,7 +528,7 @@ class NetworkSlaveThread : public NetworkThread {
 class NetworkListenThread : public NetworkThread {
  public:
   NetworkListenThread();
-  virtual int Work();
+  virtual bool Work();
 
  private:
   virtual bool Listen();
@@ -530,7 +552,7 @@ class NetworkListenThread : public NetworkThread {
 class CopyThread : public WorkerThread {
  public:
   CopyThread() {}
-  virtual int Work();
+  virtual bool Work();
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryCopiedData()
     {return GetCopiedData()*2;}
@@ -543,7 +565,7 @@ class CopyThread : public WorkerThread {
 class InvertThread : public WorkerThread {
  public:
   InvertThread() {}
-  virtual int Work();
+  virtual bool Work();
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryCopiedData()
     {return GetCopiedData()*4;}
@@ -560,7 +582,7 @@ class FillThread : public WorkerThread {
   FillThread();
   // Set how many pages this thread should fill before exiting.
   virtual void SetFillPages(int64 num_pages_to_fill_init);
-  virtual int Work();
+  virtual bool Work();
 
  private:
   // Fill a page with the data pattern in pe->pattern.
@@ -575,7 +597,7 @@ class FillThread : public WorkerThread {
 class CheckThread : public WorkerThread {
  public:
   CheckThread() {}
-  virtual int Work();
+  virtual bool Work();
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryCopiedData()
     {return GetCopiedData();}
@@ -590,7 +612,7 @@ class CheckThread : public WorkerThread {
 class ErrorPollThread : public WorkerThread {
  public:
   ErrorPollThread() {}
-  virtual int Work();
+  virtual bool Work();
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ErrorPollThread);
@@ -600,7 +622,7 @@ class ErrorPollThread : public WorkerThread {
 class CpuStressThread : public WorkerThread {
  public:
   CpuStressThread() {}
-  virtual int Work();
+  virtual bool Work();
 
  private:
   DISALLOW_COPY_AND_ASSIGN(CpuStressThread);
@@ -614,7 +636,7 @@ class CpuCacheCoherencyThread : public WorkerThread {
                           int cc_cacheline_count_,
                           int cc_thread_num_,
                           int cc_inc_count_);
-  virtual int Work();
+  virtual bool Work();
 
  protected:
   cc_cacheline_data *cc_cacheline_data_;  // Datstructure for each cacheline.
@@ -651,7 +673,7 @@ class DiskThread : public WorkerThread {
                              int64 write_threshold,
                              int non_destructive);
 
-  virtual int Work();
+  virtual bool Work();
 
   virtual float GetMemoryCopiedData() {return 0;}
 
@@ -727,7 +749,7 @@ class DiskThread : public WorkerThread {
                                                 // not verified.
   void *block_buffer_;        // Pointer to aligned block buffer.
 
-  aio_context_t aio_ctx_;     // Asynchronous I/O context for Linux native AIO.
+  io_context_t aio_ctx_;     // Asynchronous I/O context for Linux native AIO.
 
   DiskBlockTable *block_table_;  // Disk Block Table, shared by all disk
                                  // threads that read / write at the same
@@ -751,7 +773,7 @@ class MemoryRegionThread : public WorkerThread {
  public:
   MemoryRegionThread();
   ~MemoryRegionThread();
-  virtual int Work();
+  virtual bool Work();
   void ProcessError(struct ErrorRecord *error, int priority,
                     const char *message);
   bool SetRegion(void *region, int64 size);
-- 
2.30.2