New frequency test, fixed error accounting, added logging timestamps, and miscellaneo...

author ewout <ewout@google.com>

Tue, 10 Sep 2013 21:27:49 +0000 (21:27 +0000)

committer ewout <ewout@google.com>

Tue, 10 Sep 2013 21:27:49 +0000 (21:27 +0000)
author ewout <ewout@google.com>
Tue, 10 Sep 2013 21:27:49 +0000 (21:27 +0000)
committer ewout <ewout@google.com>
Tue, 10 Sep 2013 21:27:49 +0000 (21:27 +0000)
diff --git a/configure.ac b/configure.ac

index ca10966c9dac923df38efac9825cb11ae920beb5..6f09eb985383378f99aed08a9e8f89c568f2e835 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -5,10 +5,10 @@ AC_ARG_WITH(static, [  --with-static            enable static linking])
  
  if test "$with_static" == "yes"
  then
-       AC_MSG_NOTICE([Compiling with staticaly linked libraries.])
-       LIBS="$LIBS -static"
+  AC_MSG_NOTICE([Compiling with staticaly linked libraries.])
+  LIBS="$LIBS -static"
  else
-       AC_MSG_NOTICE([Compiling with dynamically linked libraries.])
+  AC_MSG_NOTICE([Compiling with dynamically linked libraries.])
  fi
  
  AC_CANONICAL_HOST
diff --git a/src/Makefile.am b/src/Makefile.am

index 2179b427c3a1526bd3278bba545f4a32af5ae8d4..16f539dfbdc17a7cd9f067713b91268a5b18d0f2 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -28,6 +28,7 @@ HFILES += error_diag.h
  HFILES += disk_blocks.h
  HFILES += adler32memcpy.h
  HFILES += logger.h
+HFILES += clock.h
  
  stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
  findmask_SOURCES = findmask.c findmask.inc
diff --git a/src/clock.h b/src/clock.h

new file mode 100644 (file)

index 0000000..4204188
--- /dev/null
+++ b/src/clock.h
@@ -0,0 +1,29 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: cferris
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_CLOCK_H_  // NOLINT
+#define STRESSAPPTEST_CLOCK_H_
+
+#include <time.h>
+
+// This class implements a clock that can be overriden for unit tests.
+class Clock {
+ public:
+  virtual ~Clock() {}
+
+  virtual time_t Now() { return time(NULL); }
+};
+
+#endif  // STRESSAPPTEST_CLOCK_H_ NOLINT
diff --git a/src/disk_blocks.cc b/src/disk_blocks.cc

index c7860b0cf4f58d8715fb018fb6311e1148dbe264..60018f9cfb707d93d01249ff1d7d58574d376387 100644 (file)
--- a/src/disk_blocks.cc
+++ b/src/disk_blocks.cc
@@ -14,38 +14,51 @@
  
  // Thread-safe container of disk blocks
  
-#include <utility>
-
  // This file must work with autoconf on its public version,
  // so these includes are correct.
  #include "disk_blocks.h"
  
-DiskBlockTable::DiskBlockTable() {
-  nelems_ = 0;
+#include <utility>
+
+// BlockData
+BlockData::BlockData() : address_(0), size_(0),
+                         references_(0), initialized_(false),
+                         pattern_(NULL) {
+  pthread_mutex_init(&data_mutex_, NULL);
+}
+
+BlockData::~BlockData() {
+  pthread_mutex_destroy(&data_mutex_);
+}
+
+void BlockData::set_initialized() {
+  pthread_mutex_lock(&data_mutex_);
+  initialized_ = true;
+  pthread_mutex_unlock(&data_mutex_);
+}
+
+bool BlockData::initialized() const {
+  pthread_mutex_lock(&data_mutex_);
+  bool initialized = initialized_;
+  pthread_mutex_unlock(&data_mutex_);
+  return initialized;
+}
+
+// DiskBlockTable
+DiskBlockTable::DiskBlockTable() : sector_size_(0), write_block_size_(0),
+                                   device_name_(""), device_sectors_(0),
+                                   segment_size_(0), size_(0) {
    pthread_mutex_init(&data_mutex_, NULL);
    pthread_mutex_init(&parameter_mutex_, NULL);
    pthread_cond_init(&data_condition_, NULL);
  }
  
  DiskBlockTable::~DiskBlockTable() {
-  CleanTable();
    pthread_mutex_destroy(&data_mutex_);
    pthread_mutex_destroy(&parameter_mutex_);
    pthread_cond_destroy(&data_condition_);
  }
  
-void DiskBlockTable::CleanTable() {
-  pthread_mutex_lock(&data_mutex_);
-  for (map<int64, StorageData*>::iterator it =
-           addr_to_block_.begin(); it != addr_to_block_.end(); ++it) {
-    delete it->second;
-  }
-  addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end());
-  nelems_ = 0;
-  pthread_cond_broadcast(&data_condition_);
-  pthread_mutex_unlock(&data_mutex_);
-}
-
  // 64-bit non-negative random number generator.  Stolen from
  // depot/google3/base/tracecontext_unittest.cc.
  int64 DiskBlockTable::Random64() {
@@ -58,28 +71,27 @@ int64 DiskBlockTable::Random64() {
      return -x;
  }
  
-int64 DiskBlockTable::NumElems() {
-  unsigned int nelems;
+uint64 DiskBlockTable::Size() {
    pthread_mutex_lock(&data_mutex_);
-  nelems = nelems_;
+  uint64 size = size_;
    pthread_mutex_unlock(&data_mutex_);
-  return nelems;
+  return size;
  }
  
  void DiskBlockTable::InsertOnStructure(BlockData *block) {
-  int64 address = block->GetAddress();
+  int64 address = block->address();
    StorageData *sd = new StorageData();
    sd->block = block;
-  sd->pos = nelems_;
+  sd->pos = size_;
    // Creating new block ...
    pthread_mutex_lock(&data_mutex_);
-  if (pos_to_addr_.size() <= nelems_) {
+  if (pos_to_addr_.size() <= size_) {
      pos_to_addr_.insert(pos_to_addr_.end(), address);
    } else {
-    pos_to_addr_[nelems_] = address;
+    pos_to_addr_[size_] = address;
    }
-  addr_to_block_.insert(std::make_pair(address, sd));
-  nelems_++;
+  addr_to_block_[address] = sd;
+  size_++;
    pthread_cond_broadcast(&data_condition_);
    pthread_mutex_unlock(&data_mutex_);
  }
@@ -87,26 +99,28 @@ void DiskBlockTable::InsertOnStructure(BlockData *block) {
  int DiskBlockTable::RemoveBlock(BlockData *block) {
    // For write threads, check the reference counter and remove
    // it from the structure.
-  int64 address = block->GetAddress();
+  int64 address = block->address();
    AddrToBlockMap::iterator it = addr_to_block_.find(address);
    int ret = 1;
    if (it != addr_to_block_.end()) {
      int curr_pos = it->second->pos;
-    int last_pos = nelems_ - 1;
+    int last_pos = size_ - 1;
      AddrToBlockMap::iterator last_it = addr_to_block_.find(
          pos_to_addr_[last_pos]);
-    sat_assert(nelems_ > 0);
+    sat_assert(size_ > 0);
      sat_assert(last_it != addr_to_block_.end());
-    // Everything is fine, updating ...
+    // Everything is fine, removing block from table.
      pthread_mutex_lock(&data_mutex_);
      pos_to_addr_[curr_pos] = pos_to_addr_[last_pos];
      last_it->second->pos = curr_pos;
      delete it->second;
      addr_to_block_.erase(it);
-    nelems_--;
+    size_--;
      block->DecreaseReferenceCounter();
      if (block->GetReferenceCounter() == 0)
        delete block;
+    else if (block->GetReferenceCounter() < 0)
+      ret = 0;
      pthread_cond_broadcast(&data_condition_);
      pthread_mutex_unlock(&data_mutex_);
    } else {
@@ -116,18 +130,16 @@ int DiskBlockTable::RemoveBlock(BlockData *block) {
  }
  
  int DiskBlockTable::ReleaseBlock(BlockData *block) {
-  // If is a random thread, just check the reference counter.
+  // If caller is a random thread, just check the reference counter.
    int ret = 1;
    pthread_mutex_lock(&data_mutex_);
    int references = block->GetReferenceCounter();
-  if (references > 0) {
-    if (references == 1)
-      delete block;
-    else
-      block->DecreaseReferenceCounter();
-  } else {
+  if (references == 1)
+    delete block;
+  else if (references > 0)
+    block->DecreaseReferenceCounter();
+  else
      ret = 0;
-  }
    pthread_mutex_unlock(&data_mutex_);
    return ret;
  }
@@ -135,13 +147,13 @@ int DiskBlockTable::ReleaseBlock(BlockData *block) {
  BlockData *DiskBlockTable::GetRandomBlock() {
    struct timespec ts;
    struct timeval tp;
-  int result = 0;
    gettimeofday(&tp, NULL);
    ts.tv_sec  = tp.tv_sec;
    ts.tv_nsec = tp.tv_usec * 1000;
    ts.tv_sec += 2;  // Wait for 2 seconds.
+  int result = 0;
    pthread_mutex_lock(&data_mutex_);
-  while (!nelems_ && result != ETIMEDOUT) {
+  while (!size_ && result != ETIMEDOUT) {
      result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts);
    }
    if (result == ETIMEDOUT) {
@@ -149,13 +161,13 @@ BlockData *DiskBlockTable::GetRandomBlock() {
      return NULL;
    } else {
      int64 random_number = Random64();
-    int64 random_pos = random_number % nelems_;
+    int64 random_pos = random_number % size_;
      int64 address = pos_to_addr_[random_pos];
      AddrToBlockMap::const_iterator it = addr_to_block_.find(address);
      sat_assert(it != addr_to_block_.end());
      BlockData *b = it->second->block;
      // A block is returned only if its content is written on disk.
-    if (b->BlockIsInitialized()) {
+    if (b->initialized()) {
        b->IncreaseReferenceCounter();
      } else {
        b = NULL;
@@ -165,45 +177,38 @@ BlockData *DiskBlockTable::GetRandomBlock() {
    }
  }
  
-void DiskBlockTable::SetParameters(
-    int sector_size, int write_block_size, int64 device_sectors,
-    int64 segment_size, string device_name) {
+void DiskBlockTable::SetParameters(int sector_size,
+                                   int write_block_size,
+                                   int64 device_sectors,
+                                   int64 segment_size,
+                                   const string& device_name) {
+  sat_assert(size_ == 0);
    pthread_mutex_lock(&parameter_mutex_);
    sector_size_ = sector_size;
    write_block_size_ = write_block_size;
    device_sectors_ = device_sectors;
    segment_size_ = segment_size;
    device_name_ = device_name;
-  CleanTable();
    pthread_mutex_unlock(&parameter_mutex_);
  }
  
  BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
    int64 sector = 0;
    BlockData *block = new BlockData();
-
    bool good_sequence = false;
-  int num_sectors;
-
    if (block == NULL) {
      logprintf(0, "Process Error: Unable to allocate memory "
                "for sector data for disk %s.\n", device_name_.c_str());
      return NULL;
    }
-
    pthread_mutex_lock(&parameter_mutex_);
-
    sat_assert(device_sectors_ != 0);
-
    // Align the first sector with the beginning of a write block
-  num_sectors = write_block_size_ / sector_size_;
-
+  int num_sectors = write_block_size_ / sector_size_;
    for (int i = 0; i < kBlockRetry && !good_sequence; i++) {
      good_sequence = true;
-
      // Use the entire disk or a small segment of the disk to allocate the first
      // sector in the block from.
-
      if (segment_size_ == -1) {
        sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % (
            device_sectors_ / num_sectors);
@@ -213,7 +218,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
            segment_size_ / num_sectors);
        sector *= num_sectors;
        sector += segment * segment_size_;
-
        // Make sure the block is within the segment.
        if (sector + num_sectors > (segment + 1) * segment_size_) {
          good_sequence = false;
@@ -229,7 +233,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
      // now aligned to the write_block_size, it is not necessary
      // to check each sector, just the first block (a sector
      // overlap will never occur).
-
      pthread_mutex_lock(&data_mutex_);
      if (addr_to_block_.find(sector) != addr_to_block_.end()) {
        good_sequence = false;
@@ -238,7 +241,8 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
    }
  
    if (good_sequence) {
-    block->SetParameters(sector, write_block_size_);
+    block->set_address(sector);
+    block->set_size(write_block_size_);
      block->IncreaseReferenceCounter();
      InsertOnStructure(block);
    } else {
@@ -248,66 +252,5 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
      block = NULL;
    }
    pthread_mutex_unlock(&parameter_mutex_);
-
    return block;
  }
-
-// BlockData
-
-BlockData::BlockData() {
-  addr_ = 0;
-  size_ = 0;
-  references_ = 0;
-  initialized_ = false;
-  pthread_mutex_init(&data_mutex_, NULL);
-}
-
-BlockData::~BlockData() {
-  pthread_mutex_destroy(&data_mutex_);
-}
-
-void BlockData::SetParameters(int64 address, int64 size) {
-  addr_ = address;
-  size_ = size;
-}
-
-void BlockData::IncreaseReferenceCounter() {
-  references_++;
-}
-
-void BlockData::DecreaseReferenceCounter() {
-  references_--;
-}
-
-int BlockData::GetReferenceCounter() {
-  return references_;
-}
-
-void BlockData::SetBlockAsInitialized() {
-  pthread_mutex_lock(&data_mutex_);
-  initialized_ = true;
-  pthread_mutex_unlock(&data_mutex_);
-}
-
-bool BlockData::BlockIsInitialized() {
-  pthread_mutex_lock(&data_mutex_);
-  bool initialized = initialized_;
-  pthread_mutex_unlock(&data_mutex_);
-  return initialized;
-}
-
-int64 BlockData::GetAddress() {
-  return addr_;
-}
-
-int64 BlockData::GetSize() {
-  return size_;
-}
-
-Pattern *BlockData::GetPattern() {
-  return pattern_;
-}
-
-void BlockData::SetPattern(Pattern *p) {
-  pattern_ = p;
-}
diff --git a/src/disk_blocks.h b/src/disk_blocks.h

index cb634c9135833d3ee780c1746ecac0e82e7d7b55..638ee9f34242592df73cf9aac7fc2043c373c877 100644 (file)
--- a/src/disk_blocks.h
+++ b/src/disk_blocks.h
@@ -25,87 +25,146 @@
  #include <map>
  #include <vector>
  #include <string>
-// This file must work with autoconf on its public version,
-// so these includes are correct.
-#include "pattern.h"
+
+#include "sattypes.h"
+
+class Pattern;
  
  // Data about a block written to disk so that it can be verified later.
+// Thread-unsafe, must be used with locks on non-const methods,
+// except for initialized accessor/mutator, which are thread-safe
+// (and in fact, is the only method supposed to be accessed from
+// someone which is not the thread-safe DiskBlockTable).
  class BlockData {
   public:
    BlockData();
    ~BlockData();
-  void SetParameters(int64 address, int64 size);
-  void IncreaseReferenceCounter();
-  void DecreaseReferenceCounter();
-  int GetReferenceCounter();
-  void SetBlockAsInitialized();
-  bool BlockIsInitialized();
-  int64 GetAddress();
-  int64 GetSize();
-  void SetPattern(Pattern *p);
-  Pattern *GetPattern();
- protected:
-  int64 addr_;         // address of first sector in block
-  int64 size_;         // size of block
-  int references_;      // reference counter
-  bool initialized_;     // flag indicating the block was written on disk
+
+  // These are reference counters used to control how many
+  // threads currently have a copy of this particular block.
+  void IncreaseReferenceCounter() { references_++; }
+  void DecreaseReferenceCounter() { references_--; }
+  int GetReferenceCounter() const { return references_; }
+
+  // Controls whether the block was written on disk or not.
+  // Once written, you cannot "un-written" then without destroying
+  // this object.
+  void set_initialized();
+  bool initialized() const;
+
+  // Accessor methods for some data related to blocks.
+  void set_address(uint64 address) { address_ = address; }
+  uint64 address() const { return address_; }
+  void set_size(uint64 size) { size_ = size; }
+  uint64 size() const { return size_; }
+  void set_pattern(Pattern *p) { pattern_ = p; }
+  Pattern *pattern() { return pattern_; }
+ private:
+  uint64 address_;  // Address of first sector in block
+  uint64 size_;  // Size of block
+  int references_;  // Reference counter
+  bool initialized_;  // Flag indicating the block was written on disk
    Pattern *pattern_;
-  pthread_mutex_t data_mutex_;
+  mutable pthread_mutex_t data_mutex_;
    DISALLOW_COPY_AND_ASSIGN(BlockData);
  };
  
-// Disk Block table - store data from blocks to be write / read by
-// a DiskThread
+// A thread-safe table used to store block data and control access
+// to these blocks, letting several threads read and write blocks on
+// disk.
  class DiskBlockTable {
   public:
    DiskBlockTable();
    virtual ~DiskBlockTable();
  
-  // Get Number of elements stored on table
-  int64 NumElems();
-  // Clean all table data
-  void CleanTable();
-  // Get a random block from the list. Only returns if a element
-  // is available (consider that other thread must have added them.
-  BlockData *GetRandomBlock();
-  // Set all initial parameters. Assumes all existent data is
+  // Returns number of elements stored on table.
+  uint64 Size();
+
+  // Sets all initial parameters. Assumes all existent data is
    // invalid and, therefore, must be removed.
    void SetParameters(int sector_size, int write_block_size,
                       int64 device_sectors,
                       int64 segment_size,
-                     string device_name);
-  // Return a new block in a unused address.
+                     const string& device_name);
+
+  // During the regular execution, there will be 2 types of threads:
+  // - Write thread:  gets a large number of blocks using GetUnusedBlock,
+  //                  writes them on disk (if on destructive mode),
+  //                  reads block content ONCE from disk and them removes
+  //                  the block from queue with RemoveBlock. After a removal a
+  //                  block is not available for read threads, but it is
+  //                  only removed from memory if there is no reference for
+  //                  this block. Note that a write thread also counts as
+  //                  a reference.
+  // - Read threads:  get one block at a time (if available) with
+  //                  GetRandomBlock, reads its content from disk,
+  //                  checking whether it is correct or not, and releases
+  //                  (Using ReleaseBlock) the block to be erased by the
+  //                  write threads. Since several read threads are allowed
+  //                  to read the same block, a reference counter is used to
+  //                  control when the block can be REALLY erased from
+  //                  memory, and all memory management is made by a
+  //                  DiskBlockTable instance.
+
+  // Returns a new block in a unused address. Does not
+  // grant ownership of the pointer to the caller
+  // (use RemoveBlock to delete the block from memory instead).
    BlockData *GetUnusedBlock(int64 segment);
-  // Remove block from structure (called by write threads)
+
+  // Removes block from structure (called by write threads). Returns
+  // 1 if successful, 0 otherwise.
    int RemoveBlock(BlockData *block);
-  // Release block to be erased (called by random threads)
-  int ReleaseBlock(BlockData *block);
  
- protected:
+  // Gets a random block from the list. Only returns if an element
+  // is available (a write thread has got this block, written it on disk,
+  // and set this block as initialized). Does not grant ownership of the
+  // pointer to the caller (use RemoveBlock to delete the block from
+  // memory instead).
+  BlockData *GetRandomBlock();
  
-  void InsertOnStructure(BlockData *block);
-  //  Generate a random 64-bit integer (virtual so it could be
-  //  override by the tests)
-  virtual int64 Random64();
+  // Releases block to be erased (called by random threads). Returns
+  // 1 if successful, 0 otherwise.
+  int ReleaseBlock(BlockData *block);
  
+ protected:
    struct StorageData {
      BlockData *block;
      int pos;
    };
-
-  static const int kBlockRetry = 100;       // Number of retries to allocate
-                                            // sectors.
-
    typedef map<int64, StorageData*> AddrToBlockMap;
    typedef vector<int64> PosToAddrVector;
+
+  // Inserts block in structure, used in tests and by other methods.
+  void InsertOnStructure(BlockData *block);
+
+  // Generates a random 64-bit integer.
+  // Virtual method so it can be overridden by the tests.
+  virtual int64 Random64();
+
+  // Accessor methods for testing.
+  const PosToAddrVector& pos_to_addr() const { return pos_to_addr_; }
+  const AddrToBlockMap& addr_to_block() const { return addr_to_block_; }
+
+  int sector_size() const { return sector_size_; }
+  int write_block_size() const { return write_block_size_; }
+  const string& device_name() const { return device_name_; }
+  int64 device_sectors() const { return device_sectors_; }
+  int64 segment_size() const { return segment_size_; }
+
+ private:
+  // Number of retries to allocate sectors.
+  static const int kBlockRetry = 100;
+  // Actual tables.
    PosToAddrVector pos_to_addr_;
    AddrToBlockMap addr_to_block_;
-  uint64 nelems_;
-  int sector_size_;          // Sector size, in bytes
-  int write_block_size_;     // Block size, in bytes
-  string device_name_;       // Device name
-  int64 device_sectors_;     // Number of sectors in device
-  int64 segment_size_;       // Segment size, in bytes
+
+  // Configuration parameters for block selection
+  int sector_size_;  // Sector size, in bytes
+  int write_block_size_;  // Block size, in bytes
+  string device_name_;  // Device name
+  int64 device_sectors_;  // Number of sectors in device
+  int64 segment_size_;  // Segment size in bytes
+  uint64 size_;  // Number of elements on table
    pthread_mutex_t data_mutex_;
    pthread_cond_t data_condition_;
    pthread_mutex_t parameter_mutex_;
diff --git a/src/findmask.c b/src/findmask.c

index d8ec30040ab740ff7fb18396b2644275bf5061af..1b10988d85c6307f13f2e8c8c2ec816f794736e5 100644 (file)
--- a/src/findmask.c
+++ b/src/findmask.c
@@ -38,6 +38,7 @@
   * current progress.
   */
  
+#include <inttypes.h>
  #include <pthread.h>
  #include <signal.h>
  #include <stdint.h>
@@ -106,7 +107,7 @@ void* thread_func(void* arg) {
  
      if (a < NOISE) b = a;
      if (b < NOISE) {
-      printf("Found mask with just %d deviations: 0x%llx\n", b, mask);
+      printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask);
        fflush(stdout);
      }
  
@@ -118,7 +119,8 @@ void* thread_func(void* arg) {
  }
  
  void signal_handler(int signum) {
-  printf("Received signal... currently evaluating mask 0x%llx!\n", lastmask);
+  printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n",
+         lastmask);
    fflush(stdout);
  }
  
diff --git a/src/logger.cc b/src/logger.cc

index e4ecb03f238d20d83ac9f592c39b5143e93f0fae..f13e00352bed79ee8db8b29a4ff7fd65d799c5d2 100644 (file)
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -17,6 +17,7 @@
  #include <pthread.h>
  #include <stdarg.h>
  #include <stdio.h>
+#include <time.h>
  #include <unistd.h>
  
  #include <string>
@@ -37,10 +38,20 @@ void Logger::VLogF(int priority, const char *format, va_list args) {
      return;
    }
    char buffer[4096];
-  int length = vsnprintf(buffer, sizeof buffer, format, args);
-  if (static_cast<size_t>(length) >= sizeof buffer) {
-    length = sizeof buffer;
-    buffer[sizeof buffer - 1] = '\n';
+  size_t length = 0;
+  if (log_timestamps_) {
+    time_t raw_time;
+    time(&raw_time);
+    struct tm time_struct;
+    localtime_r(&raw_time, &time_struct);
+    length = strftime(buffer, sizeof(buffer), "%Y/%m/%d-%H:%M:%S(%Z) ",
+                      &time_struct);
+    LOGGER_ASSERT(length);  // Catch if the buffer is set too small.
+  }
+  length += vsnprintf(buffer + length, sizeof(buffer) - length, format, args);
+  if (length >= sizeof(buffer)) {
+    length = sizeof(buffer);
+    buffer[sizeof(buffer) - 1] = '\n';
    }
    QueueLogLine(new string(buffer, length));
  }
@@ -52,19 +63,30 @@ void Logger::StartThread() {
  }
  
  void Logger::StopThread() {
-  LOGGER_ASSERT(thread_running_);
+  // Allow this to be called before the thread has started.
+  if (!thread_running_) {
+    return;
+  }
    thread_running_ = false;
-  LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+  int retval = pthread_mutex_lock(&queued_lines_mutex_);
+  LOGGER_ASSERT(0 == retval);
    bool need_cond_signal = queued_lines_.empty();
    queued_lines_.push_back(NULL);
-  LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+  retval = pthread_mutex_unlock(&queued_lines_mutex_);
+  LOGGER_ASSERT(0 == retval);
    if (need_cond_signal) {
-    LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_));
+    retval = pthread_cond_signal(&queued_lines_cond_);
+    LOGGER_ASSERT(0 == retval);
    }
-  LOGGER_ASSERT(0 == pthread_join(thread_, NULL));
+  retval = pthread_join(thread_, NULL);
+  LOGGER_ASSERT(0 == retval);
  }
  
-Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) {
+Logger::Logger()
+    : verbosity_(20),
+      log_fd_(-1),
+      thread_running_(false),
+      log_timestamps_(true) {
    LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL));
    LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL));
    LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL));
@@ -94,19 +116,15 @@ void Logger::QueueLogLine(string *line) {
    LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
  }
  
-namespace {
-void WriteToFile(const string& line, int fd) {
-  LOGGER_ASSERT(write(fd, line.data(), line.size()) ==
-                static_cast<ssize_t>(line.size()));
-}
-}
-
  void Logger::WriteAndDeleteLogLine(string *line) {
    LOGGER_ASSERT(line != NULL);
+  ssize_t bytes_written;
    if (log_fd_ >= 0) {
-    WriteToFile(*line, log_fd_);
+    bytes_written = write(log_fd_, line->data(), line->size());
+    LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
    }
-  WriteToFile(*line, 1);
+  bytes_written = write(STDOUT_FILENO, line->data(), line->size());
+  LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
    delete line;
  }
  
diff --git a/src/logger.h b/src/logger.h

index 1d70107a1309e250a46017fac71eb233d5992f20..21b3c6b5e01a3dad9294124ea10f0fe58e5d9289 100644 (file)
--- a/src/logger.h
+++ b/src/logger.h
@@ -62,7 +62,7 @@ class Logger {
  
    // Lines with a priority numerically greater than this will not be logged.
    // May not be called while multiple threads are running.
-  void SetVerbosity(int verbosity) {
+  virtual void SetVerbosity(int verbosity) {
      verbosity_ = verbosity;
    }
  
@@ -72,17 +72,22 @@ class Logger {
    // Args:
    //   log_fd: The file descriptor to write to.  Will not be closed by this
    //           object.
-  void SetLogFd(int log_fd) {
+  virtual void SetLogFd(int log_fd) {
      LOGGER_ASSERT(log_fd >= 0);
      log_fd_ = log_fd;
    }
  
    // Set output to be written to stdout only.  This is the default mode.  May
    // not be called while multiple threads are running.
-  void SetStdoutOnly() {
+  virtual void SetStdoutOnly() {
      log_fd_ = -1;
    }
  
+  // Enable or disable logging of timestamps.
+  void SetTimestampLogging(bool log_ts_enabled) {
+    log_timestamps_ = log_ts_enabled;
+  }
+
    // Logs a line, with a vprintf(3)-like interface.  This will block on writing
    // the line to stdout/disk iff the dedicated logging thread is not running.
    // This will block on adding the line to the queue if doing so would exceed
@@ -104,11 +109,12 @@ class Logger {
    // before this returns.  Waits for the thread to finish before returning.
    void StopThread();
  
- private:
+ protected:
    Logger();
  
-  ~Logger();
+  virtual ~Logger();
  
+ private:
    // Args:
    //   line: Must be non-NULL.  This function takes ownership of it.
    void QueueLogLine(string *line);
@@ -127,6 +133,7 @@ class Logger {
    int verbosity_;
    int log_fd_;
    bool thread_running_;
+  bool log_timestamps_;
    vector<string*> queued_lines_;
    // This doubles as a mutex for log_fd_ when the logging thread is not running.
    pthread_mutex_t queued_lines_mutex_;
diff --git a/src/os.cc b/src/os.cc

index 7cae23b026f4b58624d04600e65baf94115996ac..63583984984f050b92c6e8c3ff935d9a8be39a0c 100644 (file)
--- a/src/os.cc
+++ b/src/os.cc
@@ -48,6 +48,7 @@
  // so these includes are correct.
  #include "sattypes.h"
  #include "error_diag.h"
+#include "clock.h"
  
  // OsLayer initialization.
  OsLayer::OsLayer() {
@@ -55,10 +56,12 @@ OsLayer::OsLayer() {
    testmemsize_ = 0;
    totalmemsize_ = 0;
    min_hugepages_bytes_ = 0;
+  reserve_mb_ = 0;
    normal_mem_ = true;
    use_hugepages_ = false;
    use_posix_shm_ = false;
    dynamic_mapped_shmem_ = false;
+  mmapped_allocation_ = false;
    shmid_ = 0;
  
    time_initialized_ = 0;
@@ -79,17 +82,25 @@ OsLayer::OsLayer() {
    has_sse2_ = false;
  
    use_flush_page_cache_ = false;
+
+  clock_ = NULL;
  }
  
  // OsLayer cleanup.
  OsLayer::~OsLayer() {
    if (error_diagnoser_)
      delete error_diagnoser_;
+  if (clock_)
+    delete clock_;
  }
  
  // OsLayer initialization.
  bool OsLayer::Initialize() {
-  time_initialized_ = time(NULL);
+  if (!clock_) {
+    clock_ = new Clock();
+  }
+
+  time_initialized_ = clock_->Now();
    // Detect asm support.
    GetFeatures();
  
@@ -130,7 +141,7 @@ int OsLayer::AddressMode() {
  // Translates user virtual to physical address.
  uint64 OsLayer::VirtualToPhysical(void *vaddr) {
    uint64 frame, shift;
-  off64_t off = ((uintptr_t)vaddr) / getpagesize() * 8;
+  off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
    int fd = open(kPagemapPath, O_RDONLY);
    // /proc/self/pagemap is available in kernel >= 2.6.25
    if (fd < 0)
@@ -169,22 +180,10 @@ list<string> OsLayer::FindFileDevices() {
  // Get HW core features from cpuid instruction.
  void OsLayer::GetFeatures() {
  #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
-  // CPUID features documented at:
-  // http://www.sandpile.org/ia32/cpuid.htm
-  int ax, bx, cx, dx;
-  __asm__ __volatile__ (
-# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
-      "xchg %%ebx, %%esi;"
-      "cpuid;"
-      "xchg %%esi, %%ebx;"
-      : "=S" (bx),
-# else
-      "cpuid;"
-      : "=b" (bx),
-# endif
-        "=a" (ax), "=c" (cx), "=d" (dx) : "a" (1));
-  has_clflush_ = (dx >> 19) & 1;
-  has_sse2_ = (dx >> 26) & 1;
+  unsigned int eax = 1, ebx, ecx, edx;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  has_clflush_ = (edx >> 19) & 1;
+  has_sse2_ = (edx >> 26) & 1;
  
    logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
              has_clflush_ ? "true" : "false",
@@ -244,8 +243,9 @@ bool OsLayer::FlushPageCache(void) {
  void OsLayer::Flush(void *vaddr) {
    // Use the generic flush. This function is just so we can override
    // this if we are so inclined.
-  if (has_clflush_)
-    FastFlush(vaddr);
+  if (has_clflush_) {
+    OsLayer::FastFlush(vaddr);
+  }
  }
  
  
@@ -266,15 +266,14 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
  // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
  // blocks with bits distributed from each chip in that channel.
  int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
-  static const string unknown = "DIMM Unknown";
    if (!channels_) {
-    snprintf(buf, len, "%s", unknown.c_str());
-    return 0;
+    snprintf(buf, len, "DIMM Unknown");
+    return -1;
    }
  
    // Find channel by XORing address bits in channel_hash mask.
-  uint32 low = (uint32)(addr & channel_hash_);
-  uint32 high = (uint32)((addr & channel_hash_) >> 32);
+  uint32 low = static_cast<uint32>(addr & channel_hash_);
+  uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
    vector<string>& channel = (*channels_)[
        __builtin_parity(high) ^ __builtin_parity(low)];
  
@@ -342,9 +341,17 @@ string OsLayer::FindCoreMaskFormat(int32 region) {
  
  // Report an error in an easily parseable way.
  bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
-  time_t now = time(NULL);
+  time_t now = clock_->Now();
    int ttf = now - time_initialized_;
-  logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
+  if (strlen(symptom) && strlen(part)) {
+    logprintf(0, "Report Error: %s : %s : %d : %ds\n",
+              symptom, part, count, ttf);
+  } else {
+    // Log something so the error still shows up, but this won't break the
+    // parser.
+    logprintf(0, "Warning: Invalid Report Error: "
+              "%s : %s : %d : %ds\n", symptom, part, count, ttf);
+  }
    return true;
  }
  
@@ -408,12 +415,31 @@ int64 OsLayer::FindFreeMemSize() {
    //
    // TODO(nsanders): is there a more correct way to determine target
    // memory size?
-  if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
-    minsize = min_hugepages_bytes_;
-  } else if (physsize < 2048LL * kMegabyte) {
-    minsize = ((pages * 85) / 100) * pagesize;
+  if (hugepagesize > 0) {
+    if (min_hugepages_bytes_ > 0) {
+      minsize = min_hugepages_bytes_;
+    } else {
+      minsize = hugepagesize;
+    }
    } else {
-    minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+    if (physsize < 2048LL * kMegabyte) {
+      minsize = ((pages * 85) / 100) * pagesize;
+    } else {
+      minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+    }
+    // Make sure that at least reserve_mb_ is left for the system.
+    if (reserve_mb_ > 0) {
+      int64 totalsize = pages * pagesize;
+      int64 reserve_kb = reserve_mb_ * kMegabyte;
+      if (reserve_kb > totalsize) {
+        logprintf(0, "Procedural Error: %lld is bigger than the total memory "
+                  "available %lld\n", reserve_kb, totalsize);
+      } else if (reserve_kb > totalsize - minsize) {
+        logprintf(5, "Warning: Overriding memory to use: original %lld, "
+                  "current %lld\n", minsize, totalsize - reserve_kb);
+        minsize = totalsize - reserve_kb;
+      }
+    }
    }
  
    // Use hugepage sizing if available.
@@ -484,7 +510,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
                   "'sudo mount -o remount,size=100\% /dev/shm.'\n");
    } else if (hugepagesize >= length) {
      prefer_hugepages = true;
-    logprintf(3, "Log: Prefer using hugepace allocation.\n");
+    logprintf(3, "Log: Prefer using hugepage allocation.\n");
    } else {
      logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
    }
@@ -507,7 +533,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
          break;
        }
  
-      shmaddr = shmat(shmid, NULL, NULL);
+      shmaddr = shmat(shmid, NULL, 0);
        if (shmaddr == reinterpret_cast<void*>(-1)) {
          int err = errno;
          string errtxt = ErrorString(err);
@@ -564,7 +590,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
          // Do a full mapping here otherwise.
          shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
                           MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
-                         shm_object, NULL);
+                         shm_object, 0);
          if (shmaddr == reinterpret_cast<void*>(-1)) {
            int err = errno;
            string errtxt = ErrorString(err);
@@ -589,18 +615,32 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
      } while (0);
      shm_unlink("/stressapptest");
    }
-#endif // HAVE_SYS_SHM_H
+#endif  // HAVE_SYS_SHM_H
  
    if (!use_hugepages_ && !use_posix_shm_) {
-    // Use memalign to ensure that blocks are aligned enough for disk direct IO.
-    buf = static_cast<char*>(memalign(4096, length));
-    if (buf) {
-      logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
-    } else {
-      logprintf(0, "Process Error: memalign returned 0\n");
-      if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
-        logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
-                     "bit process. Please setup shared memory.\n");
+    // If the page size is what SAT is expecting explicitly perform mmap()
+    // allocation.
+    if (sysconf(_SC_PAGESIZE) >= 4096) {
+      void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+      if (map_buf != MAP_FAILED) {
+        buf = map_buf;
+        mmapped_allocation_ = true;
+        logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
+      }
+    }
+    if (!mmapped_allocation_) {
+      // Use memalign to ensure that blocks are aligned enough for disk direct
+      // IO.
+      buf = static_cast<char*>(memalign(4096, length));
+      if (buf) {
+        logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
+      } else {
+        logprintf(0, "Process Error: memalign returned 0\n");
+        if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
+          logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
+                       "bit process. Please setup shared memory.\n");
+        }
        }
      }
    }
@@ -628,6 +668,8 @@ void OsLayer::FreeTestMem() {
          munmap(testmem_, testmemsize_);
        }
        close(shmid_);
+    } else if (mmapped_allocation_) {
+      munmap(testmem_, testmemsize_);
      } else {
        free(testmem_);
      }
@@ -849,7 +891,9 @@ uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
  bool OsLayer::CpuStressWorkload() {
    double float_arr[100];
    double sum = 0;
+#ifdef HAVE_RAND_R
    unsigned int seed = 12345;
+#endif
  
    // Initialize array with random numbers.
    for (int i = 0; i < 100; i++) {
@@ -858,8 +902,9 @@ bool OsLayer::CpuStressWorkload() {
      if (rand_r(&seed) % 2)
        float_arr[i] *= -1.0;
  #else
-    float_arr[i] = rand();
-    if (rand() % 2)
+    srand(time(NULL));
+    float_arr[i] = rand();  // NOLINT
+    if (rand() % 2)         // NOLINT
        float_arr[i] *= -1.0;
  #endif
    }
@@ -877,82 +922,3 @@ bool OsLayer::CpuStressWorkload() {
      logprintf(12, "Log: I'm Feeling Lucky!\n");
    return true;
  }
-
-PCIDevices OsLayer::GetPCIDevices() {
-  PCIDevices device_list;
-  DIR *dir;
-  struct dirent *buf = new struct dirent();
-  struct dirent *entry;
-  dir = opendir(kSysfsPath);
-  if (!dir)
-    logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
-  while (readdir_r(dir, buf, &entry) == 0 && entry) {
-    PCIDevice *device;
-    unsigned int dev, func;
-    // ".", ".." or a special non-device perhaps.
-    if (entry->d_name[0] == '.')
-      continue;
-
-    device = new PCIDevice();
-    if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
-               &device->domain, &device->bus, &dev, &func) < 4) {
-      logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
-      free(device);
-      continue;
-    }
-    device->dev = dev;
-    device->func = func;
-    device->vendor_id = PCIGetValue(entry->d_name, "vendor");
-    device->device_id = PCIGetValue(entry->d_name, "device");
-    PCIGetResources(entry->d_name, device);
-    device_list.insert(device_list.end(), device);
-  }
-  closedir(dir);
-  delete buf;
-  return device_list;
-}
-
-int OsLayer::PCIGetValue(string name, string object) {
-  int fd, len;
-  char filename[256];
-  char buf[256];
-  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
-           name.c_str(), object.c_str());
-  fd = open(filename, O_RDONLY);
-  if (fd < 0)
-    return 0;
-  len = read(fd, buf, 256);
-  close(fd);
-  buf[len] = '\0';
-  return strtol(buf, NULL, 0);  // NOLINT
-}
-
-int OsLayer::PCIGetResources(string name, PCIDevice *device) {
-  char filename[256];
-  char buf[256];
-  FILE *file;
-  int64 start;
-  int64 end;
-  int64 size;
-  int i;
-  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
-           name.c_str(), "resource");
-  file = fopen(filename, "r");
-  if (!file) {
-    logprintf(0, "Process Error: impossible to find resource file for %s",
-              filename);
-    return errno;
-  }
-  for (i = 0; i < 6; i++) {
-    if (!fgets(buf, 256, file))
-      break;
-    sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
-    size = 0;
-    if (start)
-      size = end - start + 1;
-    device->base_addr[i] = start;
-    device->size[i] = size;
-  }
-  fclose(file);
-  return 0;
-}
diff --git a/src/os.h b/src/os.h

index a928577b2123ce5f72dfc34a0a5e79f1987d6076..13660d8e7ead63b7ea16ff1ba8e9c84de08d41f7 100644 (file)
--- a/src/os.h
+++ b/src/os.h
@@ -17,6 +17,8 @@
  #define STRESSAPPTEST_OS_H_
  
  #include <dirent.h>
+#include <sys/syscall.h>
+
  #include <string>
  #include <list>
  #include <map>
@@ -26,9 +28,9 @@
  // so these includes are correct.
  #include "adler32memcpy.h"  // NOLINT
  #include "sattypes.h"       // NOLINT
+#include "clock.h"          // NOLINT
  
  const char kPagemapPath[] = "/proc/self/pagemap";
-const char kSysfsPath[] = "/sys/bus/pci/devices";
  
  struct PCIDevice {
    int32 domain;
@@ -45,6 +47,8 @@ typedef vector<PCIDevice*> PCIDevices;
  
  class ErrorDiag;
  
+class Clock;
+
  // This class implements OS/Platform specific funtions.
  class OsLayer {
   public:
@@ -57,6 +61,13 @@ class OsLayer {
      min_hugepages_bytes_ = min_bytes;
    }
  
+  // Set the minium amount of memory that should not be allocated. This only
+  // has any affect if hugepages are not used.
+  // Must be set before Initialize().
+  void SetReserveSize(int64 reserve_mb) {
+    reserve_mb_ = reserve_mb;
+  }
+
    // Set parameters needed to translate physical address to memory module.
    void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
                              vector< vector<string> > *channels) {
@@ -77,13 +88,11 @@ class OsLayer {
    // Prints failed dimm. This implementation is optional for
    // subclasses to implement.
    // Takes a bus address and string, and prints the DIMM name
-  // into the string. Returns error status.
+  // into the string. Returns the DIMM number that corresponds to the
+  // address given, or -1 if unable to identify the DIMM number.
+  // Note that subclass implementations of FindDimm() MUST fill
+  // buf with at LEAST one non-whitespace character (provided len > 0).
    virtual int FindDimm(uint64 addr, char *buf, int len);
-  // Print dimm info, plus more available info.
-  virtual int FindDimmExtended(uint64 addr, char *buf, int len) {
-    return FindDimm(addr, buf, len);
-  }
-
  
    // Classifies addresses according to "regions"
    // This may mean different things on different platforms.
@@ -141,10 +150,95 @@ class OsLayer {
      // instruction. For example, software can use an MFENCE instruction to
      // insure that previous stores are included in the write-back.
      asm volatile("mfence");
-    asm volatile("clflush (%0)" :: "r" (vaddr));
+    asm volatile("clflush (%0)" : : "r" (vaddr));
+    asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+    #warning "Unsupported CPU type ARMV7A: Using syscall to cache flush."
+    // ARMv7a cachelines are 8 words (32 bytes).
+    syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
+#else
+  #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+  }
+
+  // Fast flush, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.  Takes a NULL-terminated
+  // array of addresses to flush.
+  inline static void FastFlushList(void **vaddrs) {
+#ifdef STRESSAPPTEST_CPU_PPC
+    while (*vaddrs) {
+      asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
+    }
+    asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+    // Put mfence before and after clflush to make sure:
+    // 1. The write before the clflush is committed to memory bus;
+    // 2. The read after the clflush is hitting the memory bus.
+    //
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
+    asm volatile("mfence");
+    while (*vaddrs) {
+      asm volatile("clflush (%0)" : : "r" (*vaddrs++));
+    }
+    asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+    while (*vaddrs) {
+      FastFlush(*vaddrs++);
+    }
+#else
+    #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+  }
+
+  // Fast flush hint, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.  Note that this
+  // will not guarantee that a flush happens, but will at least
+  // hint that it should.  This is useful for speeding up
+  // parallel march algorithms.
+  inline static void FastFlushHint(void *vaddr) {
+#ifdef STRESSAPPTEST_CPU_PPC
+    asm volatile("dcbf 0,%0" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
+    asm volatile("clflush (%0)" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+    FastFlush(vaddr);
+#else
+    #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+  }
+
+  // Fast flush, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.  Sync's any
+  // transactions for ordering FastFlushHints.
+  inline static void FastFlushSync() {
+#ifdef STRESSAPPTEST_CPU_PPC
+    asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+    // Put mfence before and after clflush to make sure:
+    // 1. The write before the clflush is committed to memory bus;
+    // 2. The read after the clflush is hitting the memory bus.
+    //
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
      asm volatile("mfence");
  #elif defined(STRESSAPPTEST_CPU_ARMV7A)
-  #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes."
+    // This is a NOP, FastFlushHint() always does a full flush, so there's
+    // nothing to do for FastFlushSync().
  #else
    #warning "Unsupported CPU type: Unable to force cache flushes."
  #endif
@@ -239,9 +333,6 @@ class OsLayer {
    // Handle to platform-specific error diagnoser.
    ErrorDiag *error_diagnoser_;
  
-  // Detect all PCI Devices.
-  virtual PCIDevices GetPCIDevices();
-
    // Disambiguate between different "warm" memcopies.
    virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
                                 unsigned int size_in_bytes,
@@ -258,16 +349,27 @@ class OsLayer {
    }
    ErrCallback get_err_log_callback() { return err_log_callback_; }
  
+  // Set a clock object that can be overridden for use with unit tests.
+  void SetClock(Clock *clock) {
+    if (clock_) {
+      delete clock_;
+    }
+    clock_ = clock;
+    time_initialized_ = clock_->Now();
+  }
+
   protected:
    void *testmem_;                // Location of test memory.
    uint64 testmemsize_;           // Size of test memory.
    int64 totalmemsize_;           // Size of available memory.
    int64 min_hugepages_bytes_;    // Minimum hugepages size.
+  int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
    bool  error_injection_;        // Do error injection?
    bool  normal_mem_;             // Memory DMA capable?
    bool  use_hugepages_;          // Use hugepage shmem?
    bool  use_posix_shm_;          // Use 4k page shmem?
    bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
+  bool  mmapped_allocation_;     // Was memory allocated using mmap()?
    int   shmid_;                  // Handle to shmem
    vector< vector<string> > *channels_;  // Memory module names per channel.
    uint64 channel_hash_;          // Mask of address bits XORed for channel.
@@ -291,9 +393,6 @@ class OsLayer {
  
    // Get file descriptor for dev msr.
    virtual int OpenMSR(uint32 core, uint32 address);
-  // Auxiliary methods for PCI device configuration
-  int PCIGetValue(string name, string object);
-  int PCIGetResources(string name, PCIDevice *device);
  
    // Look up how many hugepages there are.
    virtual int64 FindHugePages();
@@ -301,6 +400,9 @@ class OsLayer {
    // Link to find last transaction at an error location.
    ErrCallback err_log_callback_;
  
+  // Object to wrap the time function.
+  Clock *clock_;
+
   private:
    DISALLOW_COPY_AND_ASSIGN(OsLayer);
  };
diff --git a/src/sat.cc b/src/sat.cc

index 4f4e684db6d6223ce8c43b433653aff7b0373ba2..57fd4fe02fac5b520f90c5e0cb5ce4718527e111 100644 (file)
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -125,6 +125,26 @@ bool Sat::CheckEnvironment() {
    #error Build system regression - COPTS disregarded.
  #endif
  
+  // Check if the cpu frequency test is enabled and able to run.
+  if (cpu_freq_test_) {
+    if (!CpuFreqThread::CanRun()) {
+      logprintf(0, "Process Error: This platform does not support this "
+                "test.\n");
+      bad_status();
+      return false;
+    } else if (cpu_freq_threshold_ <= 0) {
+      logprintf(0, "Process Error: The cpu frequency test requires "
+                "--cpu_freq_threshold set to a value > 0\n");
+      bad_status();
+      return false;
+    } else if (cpu_freq_round_ < 0) {
+      logprintf(0, "Process Error: The --cpu_freq_round option must be greater"
+                " than or equal to zero. A value of zero means no rounding.\n");
+      bad_status();
+      return false;
+    }
+  }
+
    // Use all CPUs if nothing is specified.
    if (memory_threads_ == -1) {
      memory_threads_ = os_->num_cpus();
@@ -491,12 +511,6 @@ bool Sat::InitializePages() {
      if (GetValid(&pe, kInvalidTag)) {
        int64 paddr = os_->VirtualToPhysical(pe.addr);
        int32 region = os_->FindRegion(paddr);
-
-      if (i < 256) {
-        char buf[256];
-        os_->FindDimm(paddr, buf, sizeof(buf));
-        logprintf(12, "Log: address: %#llx, %s\n", paddr, buf);
-      }
        region_[region]++;
        pe.paddr = paddr;
        pe.tag = 1 << region;
@@ -554,6 +568,7 @@ bool Sat::Initialize() {
    // Initializes sync'd log file to ensure output is saved.
    if (!InitializeLogfile())
      return false;
+  Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_);
    Logger::GlobalLogger()->StartThread();
  
    logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str());
@@ -572,6 +587,10 @@ bool Sat::Initialize() {
  
    if (min_hugepages_mbytes_ > 0)
      os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+
+  if (reserve_mb_ > 0)
+    os_->SetReserveSize(reserve_mb_);
+
    if (channels_.size() > 0) {
      logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
          "%d modules per channel (x%d), decoding hash 0x%x\n",
@@ -647,6 +666,7 @@ Sat::Sat() {
    pages_ = 0;
    size_mb_ = 0;
    size_ = size_mb_ * kMegabyte;
+  reserve_mb_ = 0;
    min_hugepages_mbytes_ = 0;
    freepages_ = 0;
    paddr_base_ = 0;
@@ -661,6 +681,7 @@ Sat::Sat() {
    run_on_anything_ = 0;
    use_logfile_ = 0;
    logfile_ = 0;
+  log_timestamps_ = true;
    // Detect 32/64 bit binary.
    void *pvoid = 0;
    address_mode_ = sizeof(pvoid) * 8;
@@ -678,9 +699,15 @@ Sat::Sat() {
    // Cache coherency data initialization.
    cc_test_ = false;         // Flag to trigger cc threads.
    cc_cacheline_count_ = 2;  // Two datastructures of cache line size.
+  cc_cacheline_size_ = 0;   // Size of a cacheline (0 for auto-detect).
    cc_inc_count_ = 1000;     // Number of times to increment the shared variable.
    cc_cacheline_data_ = 0;   // Cache Line size datastructure.
  
+  // Cpu frequency data initialization.
+  cpu_freq_test_ = false;   // Flag to trigger cpu frequency thread.
+  cpu_freq_threshold_ = 0;  // Threshold, in MHz, at which a cpu fails.
+  cpu_freq_round_ = 10;     // Round the computed frequency to this value.
+
    sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL));
    file_threads_ = 0;
    net_threads_ = 0;
@@ -774,6 +801,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
      // Set number of megabyte to use.
      ARG_IVALUE("-M", size_mb_);
  
+    // Specify the amount of megabytes to be reserved for system.
+    ARG_IVALUE("--reserve_memory", reserve_mb_);
+
      // Set minimum megabytes of hugepages to require.
      ARG_IVALUE("-H", min_hugepages_mbytes_);
  
@@ -795,8 +825,21 @@ bool Sat::ParseArgs(int argc, char **argv) {
      // Set number of cache line size datastructures
      ARG_IVALUE("--cc_line_count", cc_cacheline_count_);
  
+    // Override the detected or assumed cache line size.
+    ARG_IVALUE("--cc_line_size", cc_cacheline_size_);
+
      // Flag set when cache coherency tests need to be run
-    ARG_KVALUE("--cc_test", cc_test_, 1);
+    ARG_KVALUE("--cc_test", cc_test_, true);
+
+    // Set when the cpu_frequency test needs to be run
+    ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true);
+
+    // Set the threshold in MHz at which the cpu frequency test will fail.
+    ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_);
+
+    // Set the rounding value for the cpu frequency test. The default is to
+    // round to the nearest 10s value.
+    ARG_IVALUE("--cpu_freq_round", cpu_freq_round_);
  
      // Set number of CPU stress threads.
      ARG_IVALUE("-C", cpu_stress_threads_);
@@ -807,6 +850,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
      // Verbosity level.
      ARG_IVALUE("-v", verbosity_);
  
+    // Turn off timestamps logging.
+    ARG_KVALUE("--no_timestamps", log_timestamps_, false);
+
      // Set maximum number of errors to collect. Stop running after this many.
      ARG_IVALUE("--max_errors", max_errorcount_);
  
@@ -1004,7 +1050,7 @@ bool Sat::ParseArgs(int argc, char **argv) {
      for (uint i = 0; i < channels_.size(); i++)
        if (channels_[i].size() != channels_[0].size()) {
          logprintf(6, "Process Error: "
-            "Channels 0 and %d have a different count of dram modules.\n",i);
+            "Channels 0 and %d have a different count of dram modules.\n", i);
          bad_status();
          return false;
        }
@@ -1043,6 +1089,8 @@ bool Sat::ParseArgs(int argc, char **argv) {
  void Sat::PrintHelp() {
    printf("Usage: ./sat(32|64) [options]\n"
           " -M mbytes        megabytes of ram to test\n"
+         " --reserve-memory If not using hugepages, the amount of memory to "
+         " reserve for the system\n"
           " -H mbytes        minimum megabytes of hugepages to require\n"
           " -s seconds       number of seconds to run\n"
           " -m threads       number of memory copy threads to run\n"
@@ -1054,6 +1102,7 @@ void Sat::PrintHelp() {
           " -f filename      add a disk thread with "
           "tempfile 'filename'\n"
           " -l logfile       log output to file 'logfile'\n"
+         " --no_timestamps  do not prefix timestamps to log messages\n"
           " --max_errors n   exit early after finding 'n' errors\n"
           " -v level         verbosity (0-20), default is 8\n"
           " -W               Use more CPU-stressful memory copy\n"
@@ -1091,6 +1140,13 @@ void Sat::PrintHelp() {
           "cacheline's member\n"
           " --cc_line_count  number of cache line sized datastructures "
           "to allocate for the cache coherency threads to operate\n"
+         " --cc_line_size   override the auto-detected cache line size\n"
+         " --cpu_freq_test  enable the cpu frequency test (requires the "
+         "--cpu_freq_threshold argument to be set)\n"
+         " --cpu_freq_threshold  fail the cpu frequency test if the frequency "
+         "goes below this value (specified in MHz)\n"
+         " --cpu_freq_round round the computed frequency to this value, if set"
+         " to zero, only round to the nearest MHz\n"
           " --paddr_base     allocate memory starting from this address\n"
           " --pause_delay    delay (in seconds) between power spikes\n"
           " --pause_duration duration (in seconds) of each pause\n"
@@ -1098,12 +1154,12 @@ void Sat::PrintHelp() {
           "each CPU to be tested by that CPU\n"
           " --remote_numa    choose memory regions not associated with "
           "each CPU to be tested by that CPU\n"
-         " --channel_hash   mask of address bits XORed to determine channel.\n"
-         "                  Mask 0x40 interleaves cachelines between channels\n"
+         " --channel_hash   mask of address bits XORed to determine channel. "
+         "Mask 0x40 interleaves cachelines between channels\n"
           " --channel_width bits     width in bits of each memory channel\n"
-         " --memory_channel u1,u2   defines a comma-separated list of names\n"
-         "                          for dram packages in a memory channel.\n"
-         "                          Use multiple times to define multiple channels.\n");
+         " --memory_channel u1,u2   defines a comma-separated list of names "
+         "for dram packages in a memory channel. Use multiple times to "
+         "define multiple channels.\n");
  }
  
  bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
@@ -1348,32 +1404,45 @@ void Sat::InitializeThreads() {
             sizeof(cc_cacheline_data) * cc_cacheline_count_);
  
      int num_cpus = CpuCount();
+    char *num;
+    // Calculate the number of cache lines needed just to give each core
+    // its own counter.
+    int line_size = cc_cacheline_size_;
+    if (line_size <= 0) {
+      line_size = CacheLineSize();
+      if (line_size < kCacheLineSize)
+        line_size = kCacheLineSize;
+      logprintf(12, "Log: Using %d as cache line size\n", line_size);
+    }
+    // The number of cache lines needed to hold an array of num_cpus.
+    // "num" must be the same type as cc_cacheline_data[X].num or the memory
+    // size calculations will fail.
+    int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size;
      // Allocate all the nums once so that we get a single chunk
      // of contiguous memory.
-    int *num;
  #ifdef HAVE_POSIX_MEMALIGN
      int err_result = posix_memalign(
          reinterpret_cast<void**>(&num),
-        kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_);
+        line_size, line_size * needed_lines * cc_cacheline_count_);
  #else
-    num = reinterpret_cast<int*>(memalign(kCacheLineSize,
-                       sizeof(*num) * num_cpus * cc_cacheline_count_));
+    num = reinterpret_cast<int*>(memalign(
+        line_size, line_size * needed_lines * cc_cacheline_count_));
      int err_result = (num == 0);
  #endif
      sat_assert(err_result == 0);
  
      int cline;
      for (cline = 0; cline < cc_cacheline_count_; cline++) {
-      memset(num, 0, sizeof(num_cpus) * num_cpus);
+      memset(num, 0, sizeof(*num) * num_cpus);
        cc_cacheline_data_[cline].num = num;
-      num += num_cpus;
+      num += (line_size * needed_lines) / sizeof(*num);
      }
  
      int tnum;
      for (tnum = 0; tnum < num_cpus; tnum++) {
        CpuCacheCoherencyThread *thread =
            new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_,
-                                      tnum, cc_inc_count_);
+                                      tnum, num_cpus, cc_inc_count_);
        thread->InitThread(total_threads_++, this, os_, patternlist_,
                           &continuous_status_);
        // Pin the thread to a particular core.
@@ -1384,6 +1453,22 @@ void Sat::InitializeThreads() {
      }
      workers_map_.insert(make_pair(kCCType, cc_vector));
    }
+
+  if (cpu_freq_test_) {
+    // Create the frequency test thread.
+    logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n",
+              cpu_freq_threshold_);
+    CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_,
+                                              cpu_freq_round_);
+    // This thread should be paused when other threads are paused.
+    thread->InitThread(total_threads_++, this, os_, NULL,
+                       &power_spike_status_);
+
+    WorkerVector *cpu_freq_vector = new WorkerVector();
+    cpu_freq_vector->insert(cpu_freq_vector->end(), thread);
+    workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector));
+  }
+
    ReleaseWorkerLock();
  }
  
@@ -1392,6 +1477,19 @@ int Sat::CpuCount() {
    return sysconf(_SC_NPROCESSORS_CONF);
  }
  
+// Return the worst case (largest) cache line size of the various levels of
+// cache actually prsent in the machine.
+int Sat::CacheLineSize() {
+  int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+  int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
+  if (linesize > max_linesize) max_linesize = linesize;
+  linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
+  if (linesize > max_linesize) max_linesize = linesize;
+  linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
+  if (linesize > max_linesize) max_linesize = linesize;
+  return max_linesize;
+}
+
  // Notify and reap worker threads.
  void Sat::JoinThreads() {
    logprintf(12, "Log: Joining worker threads\n");
@@ -1974,3 +2072,9 @@ void logprintf(int priority, const char *format, ...) {
    Logger::GlobalLogger()->VLogF(priority, format, args);
    va_end(args);
  }
+
+// Stop the logging thread and verify any pending data is written to the log.
+void logstop() {
+  Logger::GlobalLogger()->StopThread();
+}
+
diff --git a/src/sat.h b/src/sat.h

index 93d6b3466c52f15154fb54897473f2e7510b6d5a..92396d86e3496bfd44a6e411e512a1356118bf2b 100644 (file)
--- a/src/sat.h
+++ b/src/sat.h
@@ -134,6 +134,8 @@ class Sat {
  
    // Return the number of cpus in the system.
    int CpuCount();
+  // Return the worst-case (largest) cache line size of the system.
+  int CacheLineSize();
  
    // Collect error counts from threads.
    int64 GetTotalErrorCount();
@@ -147,13 +149,15 @@ class Sat {
    int64 pages_;                       // Number of memory blocks.
    int64 size_;                        // Size of memory tested, in bytes.
    int64 size_mb_;                     // Size of memory tested, in MB.
+  int64 reserve_mb_;                  // Reserve at least this amount of memory
+                                      // for the system, in MB.
    int64 min_hugepages_mbytes_;        // Minimum hugepages size.
    int64 freepages_;                   // How many invalid pages we need.
    int disk_pages_;                    // Number of pages per temp file.
    uint64 paddr_base_;                 // Physical address base.
-  vector< vector<string> > channels_; // Memory module names per channel.
    uint64 channel_hash_;               // Mask of address bits XORed for channel.
    int channel_width_;                 // Channel width in bits.
+  vector< vector<string> > channels_;  // Memory module names per channel.
  
    // Control flags.
    volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
@@ -172,6 +176,7 @@ class Sat {
    int use_logfile_;                   // Log to a file.
    char logfilename_[255];             // Name of file to log to.
    int logfile_;                       // File handle to log to.
+  bool log_timestamps_;               // Whether to add timestamps to log lines.
  
    // Disk thread options.
    int read_block_size_;               // Size of block to read from disk.
@@ -202,9 +207,18 @@ class Sat {
    bool cc_test_;                      // Flag to decide whether to start the
                                        // cache coherency threads.
    int cc_cacheline_count_;            // Number of cache line size structures.
+  int cc_cacheline_size_;             // Size of a cache line.
    int cc_inc_count_;                  // Number of times to increment the shared
                                        // cache lines structure members.
  
+  // Cpu Frequency Options.
+  bool cpu_freq_test_;                // Flag to decide whether to start the
+                                      // cpu frequency thread.
+  int cpu_freq_threshold_;            // The MHz threshold which will cause
+                                      // the test to fail.
+  int cpu_freq_round_;                // Round the computed frequency to this
+                                      // value.
+
    // Thread control.
    int file_threads_;                  // Threads of file IO.
    int net_threads_;                   // Threads of network IO.
@@ -252,7 +266,8 @@ class Sat {
      kRandomDiskType = 7,
      kCPUType = 8,
      kErrorType = 9,
-    kCCType = 10
+    kCCType = 10,
+    kCPUFreqType = 11,
    };
  
    // Helper functions.
diff --git a/src/sattypes.h b/src/sattypes.h

index c9341d0b00642ccea3a2d125bce22e864f8e6c8f..e51db318b44b9fe7b1be0cc41cc5a223b568517f 100644 (file)
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -27,11 +27,11 @@
  
  #ifdef HAVE_CONFIG_H  // Built using autoconf
  #ifdef __ANDROID__
-#include "stressapptest_config_android.h"
+#include "stressapptest_config_android.h"  // NOLINT
  #else
-#include "stressapptest_config.h"
-using namespace __gnu_cxx;
-#endif
+#include "stressapptest_config.h"  // NOLINT
+using namespace __gnu_cxx;  //NOLINT
+#endif  // __ANDROID__
  using namespace std;
  
  typedef signed long long   int64;
@@ -57,10 +57,10 @@ inline const char* BuildChangelist() {
  }
  
  static const bool kOpenSource = true;
-#else
+#else  // !HAVE_CONFIG_H
  static const bool kOpenSource = false;
-  #include "googlesattypes.h"
-#endif
+  #include "googlesattypes.h"  // NOLINT
+#endif  // HAVE_CONFIG_H
  // Workaround to allow 32/64 bit conversion
  // without running into strict aliasing problems.
  union datacast_t {
@@ -75,11 +75,15 @@ union datacast_t {
  // File sync'd print to console and log
  void logprintf(int priority, const char *format, ...);
  
+// Stop the log and dump any queued lines.
+void logstop();
+
  // We print to stderr ourselves first in case we're in such a bad state that the
  // logger can't work.
  #define sat_assert(x) \
  {\
    if (!(x)) {\
+    logstop();\
      fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
      logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
      exit(1);\
@@ -186,6 +190,46 @@ inline string ErrorString(int error_num) {
  #endif
  }
  
+// Execute the cpuid instruction and pass back the contents of the registers.
+// This only works on x86 based platforms.
+inline void cpuid(
+  unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+  *ebx = 0;
+  *ecx = 0;
+  *edx = 0;
+  // CPUID features documented at:
+  // http://www.sandpile.org/ia32/cpuid.htm
+#if defined(STRESSAPPTEST_CPU_I686) || defined(STRESSAPPTEST_CPU_X86_64)
+#if defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+  // In PIC compilations using the i686 cpu type, ebx contains the address
+  // of the global offset table. The compiler can't properly handle constraints
+  // using the ebx register for this compile, so preserve the register
+  // ourselves.
+  asm(
+    "mov %%ebx, %%edi;"
+    "cpuid;"
+    "xchg %%edi, %%ebx;"
+    // Output registers.
+    : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx)
+    // Input registers.
+    : "a" (*eax)
+  );  // Asm
+#else
+  asm(
+    "cpuid;"
+    // Output registers.
+    : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+    // Input registers.
+    : "a" (*eax)
+  );  // Asm
+#endif  // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+#elif defined(STRESSAPPTEST_CPU_PPC)
+  return;
+#else
+#warning "Unsupported CPU type."
+#endif
+}
+
  // Define handy constants here
  static const int kTicksPerSec = 100;
  static const int kMegabyte = (1024LL*1024LL);
diff --git a/src/worker.cc b/src/worker.cc

index d24b5cd2c077b30881385871bd61d7bbced29e6b..dcffd4e77893df93d0aaa60d167e7c093ea0b272 100644 (file)
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -78,31 +78,6 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
  #endif
  
  namespace {
-  // Get HW core ID from cpuid instruction.
-  inline int apicid(void) {
-    int cpu;
-#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
-    __asm__ __volatile__ (
-# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
-        "xchg %%ebx, %%esi;"
-        "cpuid;"
-        "xchg %%esi, %%ebx;"
-        : "=S" (cpu)
-# else
-        "cpuid;"
-        : "=b" (cpu)
-# endif
-        : "a" (1) : "cx", "dx");
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
-  #warning "Unsupported CPU type ARMV7A: unable to determine core ID."
-    cpu = 0;
-#else
-  #warning "Unsupported CPU type: unable to determine core ID."
-    cpu = 0;
-#endif
-    return (cpu >> 24);
-  }
-
    // Work around the sad fact that there are two (gnu, xsi) incompatible
    // versions of strerror_r floating around google. Awesome.
    bool sat_strerror(int err, char *buf, int len) {
@@ -124,7 +99,7 @@ namespace {
    inline uint64 addr_to_tag(void *address) {
      return reinterpret_cast<uint64>(address);
    }
-}
+}  // namespace
  
  #if !defined(O_DIRECT)
  // Sometimes this isn't available.
@@ -183,10 +158,13 @@ void WorkerStatus::StopWorkers() {
      WaitOnPauseBarrier();
  }
  
-bool WorkerStatus::ContinueRunning() {
+bool WorkerStatus::ContinueRunning(bool *paused) {
    // This loop is an optimization.  We use it to immediately re-check the status
    // after resuming from a pause, instead of returning and waiting for the next
    // call to this function.
+  if (paused) {
+    *paused = false;
+  }
    for (;;) {
      switch (GetStatus()) {
        case RUN:
@@ -197,6 +175,10 @@ bool WorkerStatus::ContinueRunning() {
          WaitOnPauseBarrier();
          // Wait for ResumeWorkers() to be called.
          WaitOnPauseBarrier();
+        // Indicate that a pause occurred.
+        if (paused) {
+          *paused = true;
+        }
          break;
        case STOP:
          return false;
@@ -325,8 +307,8 @@ bool WorkerThread::InitPriority() {
      logprintf(11, "Log: Bind to %s failed.\n",
                cpuset_format(&cpu_mask_).c_str());
  
-  logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n",
-            thread_num_, apicid(),
+  logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n",
+            thread_num_, sched_getcpu(),
              CurrentCpusFormat().c_str(),
              cpuset_format(&cpu_mask_).c_str());
  #if 0
@@ -590,7 +572,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                  const char *message) {
    char dimm_string[256] = "";
  
-  int apic_id = apicid();
+  int core_id = sched_getcpu();
  
    // Determine if this is a write or read error.
    os_->Flush(error->vaddr);
@@ -625,7 +607,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
                "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
                message,
-              apic_id,
+              core_id,
                CurrentCpusFormat().c_str(),
                error->vaddr,
                error->paddr,
@@ -825,6 +807,9 @@ int WorkerThread::CheckRegion(void *addr,
        if ((state == kGoodAgain) || (state == kBad)) {
          unsigned int blockerrors = badend - badstart + 1;
          errormessage = "Block Error";
+        // It's okay for the 1st entry to be corrected multiple times,
+        // it will simply be reported twice. Once here and once below
+        // when processing the error queue.
          ProcessError(&recorded[0], 0, errormessage.c_str());
          logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
                    "%d bytes from offset 0x%x to 0x%x\n",
@@ -833,8 +818,6 @@ int WorkerThread::CheckRegion(void *addr,
                    blockerrors * wordsize_,
                    offset + badstart * wordsize_,
                    offset + badend * wordsize_);
-        errorcount_ += blockerrors;
-        return blockerrors;
        }
      }
    }
@@ -850,7 +833,6 @@ int WorkerThread::CheckRegion(void *addr,
  
    if (page_error) {
      // For each word in the data region.
-    int error_recount = 0;
      for (int i = 0; i < length / wordsize_; i++) {
        uint64 actual = memblock[i];
        uint64 expected;
@@ -869,21 +851,16 @@ int WorkerThread::CheckRegion(void *addr,
  
        // If the value is incorrect, save an error record for later printing.
        if (actual != expected) {
-        if (error_recount < kErrorLimit) {
-          // We already reported these.
-          error_recount++;
-        } else {
-          // If we have overflowed the error queue, print the errors now.
-          struct ErrorRecord er;
-          er.actual = actual;
-          er.expected = expected;
-          er.vaddr = &memblock[i];
-
-          // Do the error printout. This will take a long time and
-          // likely change the machine state.
-          ProcessError(&er, 12, errormessage.c_str());
-          overflowerrors++;
-        }
+        // If we have overflowed the error queue, print the errors now.
+        struct ErrorRecord er;
+        er.actual = actual;
+        er.expected = expected;
+        er.vaddr = &memblock[i];
+
+        // Do the error printout. This will take a long time and
+        // likely change the machine state.
+        ProcessError(&er, 12, errormessage.c_str());
+        overflowerrors++;
        }
      }
    }
@@ -958,7 +935,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
    char tag_dimm_string[256] = "";
    bool read_error = false;
  
-  int apic_id = apicid();
+  int core_id = sched_getcpu();
  
    // Determine if this is a write or read error.
    os_->Flush(error->vaddr);
@@ -992,7 +969,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
                error->tagvaddr, error->tagpaddr,
                tag_dimm_string,
                read_error ? "read error" : "write error",
-              apic_id,
+              core_id,
                CurrentCpusFormat().c_str(),
                error->vaddr,
                error->paddr,
@@ -1110,12 +1087,18 @@ bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
    AdlerChecksum ignored_checksum;
    os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
  
-  // Force cache flush.
-  int length = size_in_bytes / sizeof(*dstmem64);
-  for (int i = 0; i < length; i += sizeof(*dstmem64)) {
-    os_->FastFlush(dstmem64 + i);
-    os_->FastFlush(srcmem64 + i);
+  // Force cache flush of both the source and destination addresses.
+  //  length - length of block to flush in cachelines.
+  //  mem_increment - number of dstmem/srcmem values per cacheline.
+  int length = size_in_bytes / kCacheLineSize;
+  int mem_increment = kCacheLineSize / sizeof(*dstmem64);
+  OsLayer::FastFlushSync();
+  for (int i = 0; i < length; ++i) {
+    OsLayer::FastFlushHint(dstmem64 + (i * mem_increment));
+    OsLayer::FastFlushHint(srcmem64 + (i * mem_increment));
    }
+  OsLayer::FastFlushSync();
+
    // Check results.
    AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
    // Patch up address tags.
@@ -1246,11 +1229,11 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                                     blocksize,
                                     currentblock * blocksize, 0);
            if (errorcount == 0) {
-            int apic_id = apicid();
+            int core_id = sched_getcpu();
              logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
                           "CRC mismatch %s != %s, "
                           "but no miscompares found on second pass.\n",
-                      apic_id, CurrentCpusFormat().c_str(),
+                      core_id, CurrentCpusFormat().c_str(),
                        crc.ToHexString().c_str(),
                        expectedcrc->ToHexString().c_str());
              struct ErrorRecord er;
@@ -1390,11 +1373,11 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                     blocksize,
                                     currentblock * blocksize, 0);
            if (errorcount == 0) {
-            int apic_id = apicid();
+            int core_id = sched_getcpu();
              logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
                           "CRC mismatch %s != %s, "
                           "but no miscompares found on second pass.\n",
-                      apic_id, CurrentCpusFormat().c_str(),
+                      core_id, CurrentCpusFormat().c_str(),
                        crc.ToHexString().c_str(),
                        expectedcrc->ToHexString().c_str());
              struct ErrorRecord er;
@@ -1610,12 +1593,11 @@ void FileThread::SetFile(const char *filename_init) {
  
  // Open the file for access.
  bool FileThread::OpenFile(int *pfile) {
-  bool no_O_DIRECT = false;
    int flags = O_RDWR | O_CREAT | O_SYNC;
    int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
    if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
-    no_O_DIRECT = true;
-    fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+    fd = open(filename_.c_str(), flags, 0644);  // Try without O_DIRECT
+    os_->ActivateFlushPageCache();  // Not using O_DIRECT fixed EINVAL
    }
    if (fd < 0) {
      logprintf(0, "Process Error: Failed to create file %s!!\n",
@@ -1623,8 +1605,6 @@ bool FileThread::OpenFile(int *pfile) {
      pages_copied_ = 0;
      return false;
    }
-  if (no_O_DIRECT)
-    os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
    *pfile = fd;
    return true;
  }
@@ -1695,7 +1675,7 @@ bool FileThread::WritePages(int fd) {
      if (!result)
        return false;
    }
-  return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
+  return os_->FlushPageCache();  // If O_DIRECT worked, this will be a NOP.
  }
  
  // Copy data from file into memory block.
@@ -2475,13 +2455,22 @@ bool CpuStressThread::Work() {
  CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
                                                   int cacheline_count,
                                                   int thread_num,
+                                                 int thread_count,
                                                   int inc_count) {
    cc_cacheline_data_ = data;
    cc_cacheline_count_ = cacheline_count;
    cc_thread_num_ = thread_num;
+  cc_thread_count_ = thread_count;
    cc_inc_count_ = inc_count;
  }
  
+// A very simple psuedorandom generator.  Since the random number is based
+// on only a few simple logic operations, it can be done quickly in registers
+// and the compiler can inline it.
+uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) {
+  return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial);
+}
+
  // Worked thread to test the cache coherency of the CPUs
  // Return false on fatal sw error.
  bool CpuCacheCoherencyThread::Work() {
@@ -2490,7 +2479,19 @@ bool CpuCacheCoherencyThread::Work() {
    uint64 time_start, time_end;
    struct timeval tv;
  
+  // Use a slightly more robust random number for the initial
+  // value, so the random sequences from the simple generator will
+  // be more divergent.
+#ifdef HAVE_RAND_R
    unsigned int seed = static_cast<unsigned int>(gettid());
+  uint64 r = static_cast<uint64>(rand_r(&seed));
+  r |= static_cast<uint64>(rand_r(&seed)) << 32;
+#else
+  srand(time(NULL));
+  uint64 r = static_cast<uint64>(rand());  // NOLINT
+  r |= static_cast<uint64>(rand()) << 32;  // NOLINT
+#endif
+
    gettimeofday(&tv, NULL);  // Get the timestamp before increments.
    time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
  
@@ -2500,14 +2501,19 @@ bool CpuCacheCoherencyThread::Work() {
        // Choose a datastructure in random and increment the appropriate
        // member in that according to the offset (which is the same as the
        // thread number.
-#ifdef HAVE_RAND_R
-      int r = rand_r(&seed);
-#else
-      int r = rand();
-#endif
-      r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
+      r = SimpleRandom(r);
+      int cline_num = r % cc_cacheline_count_;
+      int offset;
+      // Reverse the order for odd numbered threads in odd numbered cache
+      // lines.  This is designed for massively multi-core systems where the
+      // number of cores exceeds the bytes in a cache line, so "distant" cores
+      // get a chance to exercize cache coherency between them.
+      if (cline_num & cc_thread_num_ & 1)
+        offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+      else
+        offset = cc_thread_num_;
        // Increment the member of the randomely selected structure.
-      (cc_cacheline_data_[r].num[cc_thread_num_])++;
+      (cc_cacheline_data_[cline_num].num[offset])++;
      }
  
      total_inc += cc_inc_count_;
@@ -2516,14 +2522,26 @@ bool CpuCacheCoherencyThread::Work() {
      // in all the cache line structures for this particular thread.
      int cc_global_num = 0;
      for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
-      cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
+      int offset;
+      // Perform the same offset calculation from above.
+      if (cline_num & cc_thread_num_ & 1)
+        offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+      else
+        offset = cc_thread_num_;
+      cc_global_num += cc_cacheline_data_[cline_num].num[offset];
        // Reset the cachline member's value for the next run.
-      cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
+      cc_cacheline_data_[cline_num].num[offset] = 0;
      }
      if (sat_->error_injection())
        cc_global_num = -1;
  
-    if (cc_global_num != cc_inc_count_) {
+    // Since the count is only stored in a byte, to squeeze more into a
+    // single cache line, only compare it as a byte.  In the event that there
+    // is something detected, the chance that it would be missed by a single
+    // thread is 1 in 256.  If it affects all cores, that makes the chance
+    // of it being missed terribly minute.  It seems unlikely any failure
+    // case would be off by more than a small number.
+    if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) {
        errorcount_++;
        logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
                  cc_global_num, cc_inc_count_);
@@ -2707,20 +2725,17 @@ bool DiskThread::SetParameters(int read_block_size,
  
  // Open a device, return false on failure.
  bool DiskThread::OpenDevice(int *pfile) {
-  bool no_O_DIRECT = false;
    int flags = O_RDWR | O_SYNC | O_LARGEFILE;
    int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
    if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
-    no_O_DIRECT = true;
-    fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+    fd = open(device_name_.c_str(), flags, 0);  // Try without O_DIRECT
+    os_->ActivateFlushPageCache();
    }
    if (fd < 0) {
      logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
                device_name_.c_str(), thread_num_);
      return false;
    }
-  if (no_O_DIRECT)
-    os_->ActivateFlushPageCache();
    *pfile = fd;
  
    return GetDiskSize(fd);
@@ -2876,11 +2891,11 @@ bool DiskThread::DoWork(int fd) {
  
        // Block is either initialized by writing, or in nondestructive case,
        // initialized by being added into the datastructure for later reading.
-      block->SetBlockAsInitialized();
+      block->initialized();
  
        in_flight_sectors_.push(block);
      }
-    if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
+    if (!os_->FlushPageCache())  // If O_DIRECT worked, this will be a NOP.
        return false;
  
      // Verify blocks on disk.
@@ -2989,8 +3004,9 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
      errorcount_++;
      os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
  
-    if (event.res < 0) {
-      switch (event.res) {
+    int64 result = static_cast<int64>(event.res);
+    if (result < 0) {
+      switch (result) {
          case -EIO:
            logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
                         "sectors starting at %lld on disk %s (thread %d).\n",
@@ -3013,7 +3029,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
    }
  
    return true;
-#else // !HAVE_LIBAIO_H
+#else  // !HAVE_LIBAIO_H
    return false;
  #endif
  }
@@ -3021,7 +3037,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
  // Write a block to disk.
  // Return false if the block is not written.
  bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
-  memset(block_buffer_, 0, block->GetSize());
+  memset(block_buffer_, 0, block->size());
  
    // Fill block buffer with a pattern
    struct page_entry pe;
@@ -3029,30 +3045,30 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
      // Even though a valid page could not be obatined, it is not an error
      // since we can always fill in a pattern directly, albeit slower.
      unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
-    block->SetPattern(patternlist_->GetRandomPattern());
+    block->set_pattern(patternlist_->GetRandomPattern());
  
      logprintf(11, "Log: Warning, using pattern fill fallback in "
                    "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
                device_name_.c_str(), thread_num_);
  
-    for (int i = 0; i < block->GetSize()/wordsize_; i++) {
-      memblock[i] = block->GetPattern()->pattern(i);
+    for (unsigned int i = 0; i < block->size()/wordsize_; i++) {
+      memblock[i] = block->pattern()->pattern(i);
      }
    } else {
-    memcpy(block_buffer_, pe.addr, block->GetSize());
-    block->SetPattern(pe.pattern);
+    memcpy(block_buffer_, pe.addr, block->size());
+    block->set_pattern(pe.pattern);
      sat_->PutValid(&pe);
    }
  
    logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
              " (thread %d).\n",
-            block->GetSize()/kSectorSize, block->GetAddress(),
+            block->size()/kSectorSize, block->address(),
              device_name_.c_str(), thread_num_);
  
    int64 start_time = GetTime();
  
-  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
-                   block->GetAddress() * kSectorSize, write_timeout_)) {
+  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(),
+                   block->address() * kSectorSize, write_timeout_)) {
      return false;
    }
  
@@ -3073,11 +3089,11 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
  // Return true if the block was read, also increment errorcount
  // if the block had data errors or performance problems.
  bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
-  int64 blocks = block->GetSize() / read_block_size_;
+  int64 blocks = block->size() / read_block_size_;
    int64 bytes_read = 0;
    int64 current_blocks;
    int64 current_bytes;
-  uint64 address = block->GetAddress();
+  uint64 address = block->address();
  
    logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
              "(thread %d).\n",
@@ -3129,7 +3145,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
      // In non-destructive mode, don't compare the block to the pattern since
      // the block was never written to disk in the first place.
      if (!non_destructive_) {
-      if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
+      if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
                        0, bytes_read)) {
          os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
          errorcount_ += 1;
@@ -3166,7 +3182,7 @@ bool DiskThread::Work() {
    // when using direct IO.
  #ifdef HAVE_POSIX_MEMALIGN
    int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
-                              sat_->page_length());
+                                       sat_->page_length());
  #else
    block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
    int memalign_result = (block_buffer_ == 0);
@@ -3410,3 +3426,224 @@ bool MemoryRegionThread::Work() {
              "pages checked\n", thread_num_, status_, pages_copied_);
    return result;
  }
+
+// The list of MSRs to read from each cpu.
+const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = {
+  { kMsrTscAddr, "TSC" },
+  { kMsrAperfAddr, "APERF" },
+  { kMsrMperfAddr, "MPERF" },
+};
+
+CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round)
+  : num_cpus_(num_cpus),
+    freq_threshold_(freq_threshold),
+    round_(round) {
+  sat_assert(round >= 0);
+  if (round == 0) {
+    // If rounding is off, force rounding to the nearest MHz.
+    round_ = 1;
+    round_value_ = 0.5;
+  } else {
+    round_value_ = round/2.0;
+  }
+}
+
+CpuFreqThread::~CpuFreqThread() {
+}
+
+// Compute the difference between the currently read MSR values and the
+// previously read values and store the results in delta. If any of the
+// values did not increase, or the TSC value is too small, returns false.
+// Otherwise, returns true.
+bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous,
+                                 CpuDataType *delta) {
+  // Loop through the msrs.
+  for (int msr = 0; msr < kMsrLast; msr++) {
+    if (previous->msrs[msr] > current->msrs[msr]) {
+      logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx "
+                "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr],
+                current->msrs[msr]);
+      return false;
+    } else {
+      delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr];
+    }
+  }
+
+  // Check for TSC < 1 Mcycles over interval.
+  if (delta->msrs[kMsrTsc] < (1000 * 1000)) {
+    logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n");
+    return false;
+  }
+  timersub(&current->tv, &previous->tv, &delta->tv);
+
+  return true;
+}
+
+// Compute the change in values of the MSRs between current and previous,
+// set the frequency in MHz of the cpu. If there is an error computing
+// the delta, return false. Othewise, return true.
+bool CpuFreqThread::ComputeFrequency(CpuDataType *current,
+                                     CpuDataType *previous, int *freq) {
+  CpuDataType delta;
+  if (!ComputeDelta(current, previous, &delta)) {
+    return false;
+  }
+
+  double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0;
+  double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000
+                     * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval;
+
+  // Use the rounding value to round up properly.
+  int computed = static_cast<int>(frequency + round_value_);
+  *freq = computed - (computed % round_);
+  return true;
+}
+
+// This is the task function that the thread executes.
+bool CpuFreqThread::Work() {
+  cpu_set_t cpuset;
+  if (!AvailableCpus(&cpuset)) {
+    logprintf(0, "Process Error: Cannot get information about the cpus.\n");
+    return false;
+  }
+
+  // Start off indicating the test is passing.
+  status_ = true;
+
+  int curr = 0;
+  int prev = 1;
+  uint32 num_intervals = 0;
+  bool paused = false;
+  bool valid;
+  bool pass = true;
+
+  vector<CpuDataType> data[2];
+  data[0].resize(num_cpus_);
+  data[1].resize(num_cpus_);
+  while (IsReadyToRun(&paused)) {
+    if (paused) {
+      // Reset the intervals and restart logic after the pause.
+      num_intervals = 0;
+    }
+    if (num_intervals == 0) {
+      // If this is the first interval, then always wait a bit before
+      // starting to collect data.
+      sat_sleep(kStartupDelay);
+    }
+
+    // Get the per cpu counters.
+    valid = true;
+    for (int cpu = 0; cpu < num_cpus_; cpu++) {
+      if (CPU_ISSET(cpu, &cpuset)) {
+        if (!GetMsrs(cpu, &data[curr][cpu])) {
+          logprintf(0, "Failed to get msrs on cpu %d.\n", cpu);
+          valid = false;
+          break;
+        }
+      }
+    }
+    if (!valid) {
+      // Reset the number of collected intervals since something bad happened.
+      num_intervals = 0;
+      continue;
+    }
+
+    num_intervals++;
+
+    // Only compute a delta when we have at least two intervals worth of data.
+    if (num_intervals > 2) {
+      for (int cpu = 0; cpu < num_cpus_; cpu++) {
+        if (CPU_ISSET(cpu, &cpuset)) {
+          int freq;
+          if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu],
+                                &freq)) {
+            // Reset the number of collected intervals since an unknown
+            // error occurred.
+            logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu);
+            num_intervals = 0;
+            break;
+          }
+          logprintf(15, "Cpu %d Freq %d\n", cpu, freq);
+          if (freq < freq_threshold_) {
+            errorcount_++;
+            pass = false;
+            logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz "
+                      "threshold %d MHz.\n", cpu, freq, freq_threshold_);
+          }
+        }
+      }
+    }
+
+    sat_sleep(kIntervalPause);
+
+    // Swap the values in curr and prev (these values flip between 0 and 1).
+    curr ^= 1;
+    prev ^= 1;
+  }
+
+  return pass;
+}
+
+
+// Get the MSR values for this particular cpu and save them in data. If
+// any error is encountered, returns false. Otherwise, returns true.
+bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) {
+  for (int msr = 0; msr < kMsrLast; msr++) {
+    if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) {
+      return false;
+    }
+  }
+  // Save the time at which we acquired these values.
+  gettimeofday(&data->tv, NULL);
+
+  return true;
+}
+
+// Returns true if this test can run on the current machine. Otherwise,
+// returns false.
+bool CpuFreqThread::CanRun() {
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+  unsigned int eax, ebx, ecx, edx;
+
+  // Check that the TSC feature is supported.
+  // This check is valid for both Intel and AMD.
+  eax = 1;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (!(edx & (1 << 5))) {
+    logprintf(0, "Process Error: No TSC support.\n");
+    return false;
+  }
+
+  // Check the highest extended function level supported.
+  // This check is valid for both Intel and AMD.
+  eax = 0x80000000;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (eax < 0x80000007) {
+    logprintf(0, "Process Error: No invariant TSC support.\n");
+    return false;
+  }
+
+  // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+  // This check is valid for both Intel and AMD.
+  eax = 0x80000007;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if ((edx & (1 << 8)) == 0) {
+    logprintf(0, "Process Error: No non-stop TSC support.\n");
+    return false;
+  }
+
+  // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+  // This check is valid for both Intel and AMD.
+  eax = 0x6;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if ((ecx & 1) == 0) {
+    logprintf(0, "Process Error: No APERF MSR support.\n");
+    return false;
+  }
+  return true;
+#else
+  logprintf(0, "Process Error: "
+               "cpu_freq_test is only supported on X86 processors.\n");
+  return false;
+#endif
+}
diff --git a/src/worker.h b/src/worker.h

index 31e02258c9d0be4f05ed7b5d9ddbf96e1ef3fe4f..6f9fde7d1f9ffd6b2c9796b851946d1c963cbfab 100644 (file)
--- a/src/worker.h
+++ b/src/worker.h
@@ -44,7 +44,7 @@
  
  // Global Datastruture shared by the Cache Coherency Worker Threads.
  struct cc_cacheline_data {
-  int *num;
+  char *num;
  };
  
  // Typical usage:
@@ -127,10 +127,8 @@ class WorkerStatus {
    // ResumeWorkers() or StopWorkers() has been called.  Number of distinct
    // calling threads must match the worker count (see AddWorkers() and
    // RemoveSelf()).
-  bool ContinueRunning();
+  bool ContinueRunning(bool *paused);
  
-  // TODO(matthewb): Is this functionality really necessary?  Remove it if not.
-  //
    // This is a hack!  It's like ContinueRunning(), except it won't pause.  If
    // any worker threads use this exclusively in place of ContinueRunning() then
    // PauseWorkers() should never be used!
@@ -304,9 +302,10 @@ class WorkerThread {
    //   do {
    //     // work.
    //   } while (IsReadyToRun());
-  virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); }
-  // TODO(matthewb): Is this function really necessary? Remove it if not.
-  //
+  virtual bool IsReadyToRun(bool *paused = NULL) {
+    return worker_status_->ContinueRunning(paused);
+  }
+
    // Like IsReadyToRun(), except it won't pause.
    virtual bool IsReadyToRunNoPause() {
      return worker_status_->ContinueRunningNoPause();
@@ -641,16 +640,27 @@ class CpuCacheCoherencyThread : public WorkerThread {
    CpuCacheCoherencyThread(cc_cacheline_data *cc_data,
                            int cc_cacheline_count_,
                            int cc_thread_num_,
+                          int cc_thread_count_,
                            int cc_inc_count_);
    virtual bool Work();
  
   protected:
+  // Used by the simple random number generator as a shift feedback;
+  // this polynomial (x^64 + x^63 + x^61 + x^60 + 1) will produce a
+  // psuedorandom cycle of period 2^64-1.
+  static const uint64 kRandomPolynomial = 0xD800000000000000ULL;
+  // A very simple psuedorandom generator that can be inlined and use
+  // registers, to keep the CC test loop tight and focused.
+  static uint64 SimpleRandom(uint64 seed);
+
    cc_cacheline_data *cc_cacheline_data_;  // Datstructure for each cacheline.
    int cc_local_num_;        // Local counter for each thread.
    int cc_cacheline_count_;  // Number of cache lines to operate on.
    int cc_thread_num_;       // The integer id of the thread which is
                              // used as an index into the integer array
                              // of the cacheline datastructure.
+  int cc_thread_count_;     // Total number of threads being run, for
+                            // calculations mixing up cache line access.
    int cc_inc_count_;        // Number of times to increment the counter.
  
   private:
@@ -809,4 +819,80 @@ class MemoryRegionThread : public WorkerThread {
    DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread);
  };
  
+// Worker thread to check that the frequency of every cpu does not go below a
+// certain threshold.
+class CpuFreqThread : public WorkerThread {
+ public:
+  CpuFreqThread(int num_cpus, int freq_threshold, int round);
+  ~CpuFreqThread();
+
+  // This is the task function that the thread executes.
+  virtual bool Work();
+
+  // Returns true if this test can run on the current machine. Otherwise,
+  // returns false.
+  static bool CanRun();
+
+ private:
+  static const int kIntervalPause = 10;   // The number of seconds to pause
+                                          // between acquiring the MSR data.
+  static const int kStartupDelay = 5;     // The number of seconds to wait
+                                          // before acquiring MSR data.
+  static const int kMsrTscAddr = 0x10;    // The address of the TSC MSR.
+  static const int kMsrAperfAddr = 0xE8;  // The address of the APERF MSR.
+  static const int kMsrMperfAddr = 0xE7;  // The address of the MPERF MSR.
+
+  // The index values into the CpuDataType.msr[] array.
+  enum MsrValues {
+    kMsrTsc = 0,           // MSR index 0 = TSC.
+    kMsrAperf = 1,         // MSR index 1 = APERF.
+    kMsrMperf = 2,         // MSR index 2 = MPERF.
+    kMsrLast,              // Last MSR index.
+  };
+
+  typedef struct {
+    uint32 msr;         // The address of the MSR.
+    const char *name;   // A human readable string for the MSR.
+  } CpuRegisterType;
+
+  typedef struct {
+    uint64 msrs[kMsrLast];  // The values of the MSRs.
+    struct timeval tv;      // The time at which the MSRs were read.
+  } CpuDataType;
+
+  // The set of MSR addresses and register names.
+  static const CpuRegisterType kCpuRegisters[kMsrLast];
+
+  // Compute the change in values of the MSRs between current and previous,
+  // set the frequency in MHz of the cpu. If there is an error computing
+  // the delta, return false. Othewise, return true.
+  bool ComputeFrequency(CpuDataType *current, CpuDataType *previous,
+                        int *frequency);
+
+  // Get the MSR values for this particular cpu and save them in data. If
+  // any error is encountered, returns false. Otherwise, returns true.
+  bool GetMsrs(int cpu, CpuDataType *data);
+
+  // Compute the difference between the currently read MSR values and the
+  // previously read values and store the results in delta. If any of the
+  // values did not increase, or the TSC value is too small, returns false.
+  // Otherwise, returns true.
+  bool ComputeDelta(CpuDataType *current, CpuDataType *previous,
+                    CpuDataType *delta);
+
+  // The total number of cpus on the system.
+  int num_cpus_;
+
+  // The minimum frequency that each cpu must operate at (in MHz).
+  int freq_threshold_;
+
+  // The value to round the computed frequency to.
+  int round_;
+
+  // Precomputed value to add to the frequency to do the rounding.
+  double round_value_;
+
+  DISALLOW_COPY_AND_ASSIGN(CpuFreqThread);
+};
+
  #endif  // STRESSAPPTEST_WORKER_H_
diff --git a/stressapptest.1 b/stressapptest.1

index 695f9ee583fceea08e2df4b230c8ea840a1bf25d..2c914785509c17ff24b47e4158cef409306f6e5d 100644 (file)
--- a/stressapptest.1
+++ b/stressapptest.1
@@ -86,9 +86,14 @@ Number of times to increment the cacheline's member.
  
  .TP
  .B \-\-cc_line_count <number>
-Mumber of cache line sized datastructures to allocate for the cache coherency
+Number of cache line sized datastructures to allocate for the cache coherency
  threads to operate.
  
+.TP
+.B \-\-cc_line_size <number>
+Size of cache line to use as the basis for cache coherency test data
+structures.
+
  .TP
  .B \-\-cc_test
  Do the cache coherency testing.
author	ewout <ewout@google.com>
	Tue, 10 Sep 2013 21:27:49 +0000 (21:27 +0000)
committer	ewout <ewout@google.com>
	Tue, 10 Sep 2013 21:27:49 +0000 (21:27 +0000)
configure.ac		patch \| blob \| history
src/Makefile.am		patch \| blob \| history
src/clock.h	[new file with mode: 0644]	patch \| blob
src/disk_blocks.cc		patch \| blob \| history
src/disk_blocks.h		patch \| blob \| history
src/findmask.c		patch \| blob \| history
src/logger.cc		patch \| blob \| history
src/logger.h		patch \| blob \| history
src/os.cc		patch \| blob \| history
src/os.h		patch \| blob \| history
src/sat.cc		patch \| blob \| history
src/sat.h		patch \| blob \| history
src/sattypes.h		patch \| blob \| history
src/worker.cc		patch \| blob \| history
src/worker.h		patch \| blob \| history
stressapptest.1		patch \| blob \| history