* Added a CPU Frequency test for select X86 processors to verify a minimum frequency is maintained during non-pause periods.
* Fixed the error accounting in WorkerThread::CheckRegion if more than 128 miscompares are found and when block errors are detected.
* Updated the logger to include timestamps and the associated timezone.
* Moved from apicid() to sched_getcpu() for determining the core ID.
* Added the ability to reserve a specified amount of memory. This can override the requested memory allocation.
* If not using POSIX shared memory or hugepages, explicitly mmap memory if the pagesize is 4kB otherwise use memalign.
* Removed the OSLayer's unused PCI device handling.
* Numerous refactoring changes.
if test "$with_static" == "yes"
then
- AC_MSG_NOTICE([Compiling with staticaly linked libraries.])
- LIBS="$LIBS -static"
+ AC_MSG_NOTICE([Compiling with staticaly linked libraries.])
+ LIBS="$LIBS -static"
else
- AC_MSG_NOTICE([Compiling with dynamically linked libraries.])
+ AC_MSG_NOTICE([Compiling with dynamically linked libraries.])
fi
AC_CANONICAL_HOST
HFILES += disk_blocks.h
HFILES += adler32memcpy.h
HFILES += logger.h
+HFILES += clock.h
stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
findmask_SOURCES = findmask.c findmask.inc
--- /dev/null
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: cferris
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_CLOCK_H_ // NOLINT
+#define STRESSAPPTEST_CLOCK_H_
+
+#include <time.h>
+
+// This class implements a clock that can be overriden for unit tests.
+class Clock {
+ public:
+ virtual ~Clock() {}
+
+ virtual time_t Now() { return time(NULL); }
+};
+
+#endif // STRESSAPPTEST_CLOCK_H_ NOLINT
// Thread-safe container of disk blocks
-#include <utility>
-
// This file must work with autoconf on its public version,
// so these includes are correct.
#include "disk_blocks.h"
-DiskBlockTable::DiskBlockTable() {
- nelems_ = 0;
+#include <utility>
+
+// BlockData
+BlockData::BlockData() : address_(0), size_(0),
+ references_(0), initialized_(false),
+ pattern_(NULL) {
+ pthread_mutex_init(&data_mutex_, NULL);
+}
+
+BlockData::~BlockData() {
+ pthread_mutex_destroy(&data_mutex_);
+}
+
+void BlockData::set_initialized() {
+ pthread_mutex_lock(&data_mutex_);
+ initialized_ = true;
+ pthread_mutex_unlock(&data_mutex_);
+}
+
+bool BlockData::initialized() const {
+ pthread_mutex_lock(&data_mutex_);
+ bool initialized = initialized_;
+ pthread_mutex_unlock(&data_mutex_);
+ return initialized;
+}
+
+// DiskBlockTable
+DiskBlockTable::DiskBlockTable() : sector_size_(0), write_block_size_(0),
+ device_name_(""), device_sectors_(0),
+ segment_size_(0), size_(0) {
pthread_mutex_init(&data_mutex_, NULL);
pthread_mutex_init(¶meter_mutex_, NULL);
pthread_cond_init(&data_condition_, NULL);
}
DiskBlockTable::~DiskBlockTable() {
- CleanTable();
pthread_mutex_destroy(&data_mutex_);
pthread_mutex_destroy(¶meter_mutex_);
pthread_cond_destroy(&data_condition_);
}
-void DiskBlockTable::CleanTable() {
- pthread_mutex_lock(&data_mutex_);
- for (map<int64, StorageData*>::iterator it =
- addr_to_block_.begin(); it != addr_to_block_.end(); ++it) {
- delete it->second;
- }
- addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end());
- nelems_ = 0;
- pthread_cond_broadcast(&data_condition_);
- pthread_mutex_unlock(&data_mutex_);
-}
-
// 64-bit non-negative random number generator. Stolen from
// depot/google3/base/tracecontext_unittest.cc.
int64 DiskBlockTable::Random64() {
return -x;
}
-int64 DiskBlockTable::NumElems() {
- unsigned int nelems;
+uint64 DiskBlockTable::Size() {
pthread_mutex_lock(&data_mutex_);
- nelems = nelems_;
+ uint64 size = size_;
pthread_mutex_unlock(&data_mutex_);
- return nelems;
+ return size;
}
void DiskBlockTable::InsertOnStructure(BlockData *block) {
- int64 address = block->GetAddress();
+ int64 address = block->address();
StorageData *sd = new StorageData();
sd->block = block;
- sd->pos = nelems_;
+ sd->pos = size_;
// Creating new block ...
pthread_mutex_lock(&data_mutex_);
- if (pos_to_addr_.size() <= nelems_) {
+ if (pos_to_addr_.size() <= size_) {
pos_to_addr_.insert(pos_to_addr_.end(), address);
} else {
- pos_to_addr_[nelems_] = address;
+ pos_to_addr_[size_] = address;
}
- addr_to_block_.insert(std::make_pair(address, sd));
- nelems_++;
+ addr_to_block_[address] = sd;
+ size_++;
pthread_cond_broadcast(&data_condition_);
pthread_mutex_unlock(&data_mutex_);
}
int DiskBlockTable::RemoveBlock(BlockData *block) {
// For write threads, check the reference counter and remove
// it from the structure.
- int64 address = block->GetAddress();
+ int64 address = block->address();
AddrToBlockMap::iterator it = addr_to_block_.find(address);
int ret = 1;
if (it != addr_to_block_.end()) {
int curr_pos = it->second->pos;
- int last_pos = nelems_ - 1;
+ int last_pos = size_ - 1;
AddrToBlockMap::iterator last_it = addr_to_block_.find(
pos_to_addr_[last_pos]);
- sat_assert(nelems_ > 0);
+ sat_assert(size_ > 0);
sat_assert(last_it != addr_to_block_.end());
- // Everything is fine, updating ...
+ // Everything is fine, removing block from table.
pthread_mutex_lock(&data_mutex_);
pos_to_addr_[curr_pos] = pos_to_addr_[last_pos];
last_it->second->pos = curr_pos;
delete it->second;
addr_to_block_.erase(it);
- nelems_--;
+ size_--;
block->DecreaseReferenceCounter();
if (block->GetReferenceCounter() == 0)
delete block;
+ else if (block->GetReferenceCounter() < 0)
+ ret = 0;
pthread_cond_broadcast(&data_condition_);
pthread_mutex_unlock(&data_mutex_);
} else {
}
int DiskBlockTable::ReleaseBlock(BlockData *block) {
- // If is a random thread, just check the reference counter.
+ // If caller is a random thread, just check the reference counter.
int ret = 1;
pthread_mutex_lock(&data_mutex_);
int references = block->GetReferenceCounter();
- if (references > 0) {
- if (references == 1)
- delete block;
- else
- block->DecreaseReferenceCounter();
- } else {
+ if (references == 1)
+ delete block;
+ else if (references > 0)
+ block->DecreaseReferenceCounter();
+ else
ret = 0;
- }
pthread_mutex_unlock(&data_mutex_);
return ret;
}
BlockData *DiskBlockTable::GetRandomBlock() {
struct timespec ts;
struct timeval tp;
- int result = 0;
gettimeofday(&tp, NULL);
ts.tv_sec = tp.tv_sec;
ts.tv_nsec = tp.tv_usec * 1000;
ts.tv_sec += 2; // Wait for 2 seconds.
+ int result = 0;
pthread_mutex_lock(&data_mutex_);
- while (!nelems_ && result != ETIMEDOUT) {
+ while (!size_ && result != ETIMEDOUT) {
result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts);
}
if (result == ETIMEDOUT) {
return NULL;
} else {
int64 random_number = Random64();
- int64 random_pos = random_number % nelems_;
+ int64 random_pos = random_number % size_;
int64 address = pos_to_addr_[random_pos];
AddrToBlockMap::const_iterator it = addr_to_block_.find(address);
sat_assert(it != addr_to_block_.end());
BlockData *b = it->second->block;
// A block is returned only if its content is written on disk.
- if (b->BlockIsInitialized()) {
+ if (b->initialized()) {
b->IncreaseReferenceCounter();
} else {
b = NULL;
}
}
-void DiskBlockTable::SetParameters(
- int sector_size, int write_block_size, int64 device_sectors,
- int64 segment_size, string device_name) {
+void DiskBlockTable::SetParameters(int sector_size,
+ int write_block_size,
+ int64 device_sectors,
+ int64 segment_size,
+ const string& device_name) {
+ sat_assert(size_ == 0);
pthread_mutex_lock(¶meter_mutex_);
sector_size_ = sector_size;
write_block_size_ = write_block_size;
device_sectors_ = device_sectors;
segment_size_ = segment_size;
device_name_ = device_name;
- CleanTable();
pthread_mutex_unlock(¶meter_mutex_);
}
BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
int64 sector = 0;
BlockData *block = new BlockData();
-
bool good_sequence = false;
- int num_sectors;
-
if (block == NULL) {
logprintf(0, "Process Error: Unable to allocate memory "
"for sector data for disk %s.\n", device_name_.c_str());
return NULL;
}
-
pthread_mutex_lock(¶meter_mutex_);
-
sat_assert(device_sectors_ != 0);
-
// Align the first sector with the beginning of a write block
- num_sectors = write_block_size_ / sector_size_;
-
+ int num_sectors = write_block_size_ / sector_size_;
for (int i = 0; i < kBlockRetry && !good_sequence; i++) {
good_sequence = true;
-
// Use the entire disk or a small segment of the disk to allocate the first
// sector in the block from.
-
if (segment_size_ == -1) {
sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % (
device_sectors_ / num_sectors);
segment_size_ / num_sectors);
sector *= num_sectors;
sector += segment * segment_size_;
-
// Make sure the block is within the segment.
if (sector + num_sectors > (segment + 1) * segment_size_) {
good_sequence = false;
// now aligned to the write_block_size, it is not necessary
// to check each sector, just the first block (a sector
// overlap will never occur).
-
pthread_mutex_lock(&data_mutex_);
if (addr_to_block_.find(sector) != addr_to_block_.end()) {
good_sequence = false;
}
if (good_sequence) {
- block->SetParameters(sector, write_block_size_);
+ block->set_address(sector);
+ block->set_size(write_block_size_);
block->IncreaseReferenceCounter();
InsertOnStructure(block);
} else {
block = NULL;
}
pthread_mutex_unlock(¶meter_mutex_);
-
return block;
}
-
-// BlockData
-
-BlockData::BlockData() {
- addr_ = 0;
- size_ = 0;
- references_ = 0;
- initialized_ = false;
- pthread_mutex_init(&data_mutex_, NULL);
-}
-
-BlockData::~BlockData() {
- pthread_mutex_destroy(&data_mutex_);
-}
-
-void BlockData::SetParameters(int64 address, int64 size) {
- addr_ = address;
- size_ = size;
-}
-
-void BlockData::IncreaseReferenceCounter() {
- references_++;
-}
-
-void BlockData::DecreaseReferenceCounter() {
- references_--;
-}
-
-int BlockData::GetReferenceCounter() {
- return references_;
-}
-
-void BlockData::SetBlockAsInitialized() {
- pthread_mutex_lock(&data_mutex_);
- initialized_ = true;
- pthread_mutex_unlock(&data_mutex_);
-}
-
-bool BlockData::BlockIsInitialized() {
- pthread_mutex_lock(&data_mutex_);
- bool initialized = initialized_;
- pthread_mutex_unlock(&data_mutex_);
- return initialized;
-}
-
-int64 BlockData::GetAddress() {
- return addr_;
-}
-
-int64 BlockData::GetSize() {
- return size_;
-}
-
-Pattern *BlockData::GetPattern() {
- return pattern_;
-}
-
-void BlockData::SetPattern(Pattern *p) {
- pattern_ = p;
-}
#include <map>
#include <vector>
#include <string>
-// This file must work with autoconf on its public version,
-// so these includes are correct.
-#include "pattern.h"
+
+#include "sattypes.h"
+
+class Pattern;
// Data about a block written to disk so that it can be verified later.
+// Thread-unsafe, must be used with locks on non-const methods,
+// except for initialized accessor/mutator, which are thread-safe
+// (and in fact, is the only method supposed to be accessed from
+// someone which is not the thread-safe DiskBlockTable).
class BlockData {
public:
BlockData();
~BlockData();
- void SetParameters(int64 address, int64 size);
- void IncreaseReferenceCounter();
- void DecreaseReferenceCounter();
- int GetReferenceCounter();
- void SetBlockAsInitialized();
- bool BlockIsInitialized();
- int64 GetAddress();
- int64 GetSize();
- void SetPattern(Pattern *p);
- Pattern *GetPattern();
- protected:
- int64 addr_; // address of first sector in block
- int64 size_; // size of block
- int references_; // reference counter
- bool initialized_; // flag indicating the block was written on disk
+
+ // These are reference counters used to control how many
+ // threads currently have a copy of this particular block.
+ void IncreaseReferenceCounter() { references_++; }
+ void DecreaseReferenceCounter() { references_--; }
+ int GetReferenceCounter() const { return references_; }
+
+ // Controls whether the block was written on disk or not.
+ // Once written, you cannot "un-written" then without destroying
+ // this object.
+ void set_initialized();
+ bool initialized() const;
+
+ // Accessor methods for some data related to blocks.
+ void set_address(uint64 address) { address_ = address; }
+ uint64 address() const { return address_; }
+ void set_size(uint64 size) { size_ = size; }
+ uint64 size() const { return size_; }
+ void set_pattern(Pattern *p) { pattern_ = p; }
+ Pattern *pattern() { return pattern_; }
+ private:
+ uint64 address_; // Address of first sector in block
+ uint64 size_; // Size of block
+ int references_; // Reference counter
+ bool initialized_; // Flag indicating the block was written on disk
Pattern *pattern_;
- pthread_mutex_t data_mutex_;
+ mutable pthread_mutex_t data_mutex_;
DISALLOW_COPY_AND_ASSIGN(BlockData);
};
-// Disk Block table - store data from blocks to be write / read by
-// a DiskThread
+// A thread-safe table used to store block data and control access
+// to these blocks, letting several threads read and write blocks on
+// disk.
class DiskBlockTable {
public:
DiskBlockTable();
virtual ~DiskBlockTable();
- // Get Number of elements stored on table
- int64 NumElems();
- // Clean all table data
- void CleanTable();
- // Get a random block from the list. Only returns if a element
- // is available (consider that other thread must have added them.
- BlockData *GetRandomBlock();
- // Set all initial parameters. Assumes all existent data is
+ // Returns number of elements stored on table.
+ uint64 Size();
+
+ // Sets all initial parameters. Assumes all existent data is
// invalid and, therefore, must be removed.
void SetParameters(int sector_size, int write_block_size,
int64 device_sectors,
int64 segment_size,
- string device_name);
- // Return a new block in a unused address.
+ const string& device_name);
+
+ // During the regular execution, there will be 2 types of threads:
+ // - Write thread: gets a large number of blocks using GetUnusedBlock,
+ // writes them on disk (if on destructive mode),
+ // reads block content ONCE from disk and them removes
+ // the block from queue with RemoveBlock. After a removal a
+ // block is not available for read threads, but it is
+ // only removed from memory if there is no reference for
+ // this block. Note that a write thread also counts as
+ // a reference.
+ // - Read threads: get one block at a time (if available) with
+ // GetRandomBlock, reads its content from disk,
+ // checking whether it is correct or not, and releases
+ // (Using ReleaseBlock) the block to be erased by the
+ // write threads. Since several read threads are allowed
+ // to read the same block, a reference counter is used to
+ // control when the block can be REALLY erased from
+ // memory, and all memory management is made by a
+ // DiskBlockTable instance.
+
+ // Returns a new block in a unused address. Does not
+ // grant ownership of the pointer to the caller
+ // (use RemoveBlock to delete the block from memory instead).
BlockData *GetUnusedBlock(int64 segment);
- // Remove block from structure (called by write threads)
+
+ // Removes block from structure (called by write threads). Returns
+ // 1 if successful, 0 otherwise.
int RemoveBlock(BlockData *block);
- // Release block to be erased (called by random threads)
- int ReleaseBlock(BlockData *block);
- protected:
+ // Gets a random block from the list. Only returns if an element
+ // is available (a write thread has got this block, written it on disk,
+ // and set this block as initialized). Does not grant ownership of the
+ // pointer to the caller (use RemoveBlock to delete the block from
+ // memory instead).
+ BlockData *GetRandomBlock();
- void InsertOnStructure(BlockData *block);
- // Generate a random 64-bit integer (virtual so it could be
- // override by the tests)
- virtual int64 Random64();
+ // Releases block to be erased (called by random threads). Returns
+ // 1 if successful, 0 otherwise.
+ int ReleaseBlock(BlockData *block);
+ protected:
struct StorageData {
BlockData *block;
int pos;
};
-
- static const int kBlockRetry = 100; // Number of retries to allocate
- // sectors.
-
typedef map<int64, StorageData*> AddrToBlockMap;
typedef vector<int64> PosToAddrVector;
+
+ // Inserts block in structure, used in tests and by other methods.
+ void InsertOnStructure(BlockData *block);
+
+ // Generates a random 64-bit integer.
+ // Virtual method so it can be overridden by the tests.
+ virtual int64 Random64();
+
+ // Accessor methods for testing.
+ const PosToAddrVector& pos_to_addr() const { return pos_to_addr_; }
+ const AddrToBlockMap& addr_to_block() const { return addr_to_block_; }
+
+ int sector_size() const { return sector_size_; }
+ int write_block_size() const { return write_block_size_; }
+ const string& device_name() const { return device_name_; }
+ int64 device_sectors() const { return device_sectors_; }
+ int64 segment_size() const { return segment_size_; }
+
+ private:
+ // Number of retries to allocate sectors.
+ static const int kBlockRetry = 100;
+ // Actual tables.
PosToAddrVector pos_to_addr_;
AddrToBlockMap addr_to_block_;
- uint64 nelems_;
- int sector_size_; // Sector size, in bytes
- int write_block_size_; // Block size, in bytes
- string device_name_; // Device name
- int64 device_sectors_; // Number of sectors in device
- int64 segment_size_; // Segment size, in bytes
+
+ // Configuration parameters for block selection
+ int sector_size_; // Sector size, in bytes
+ int write_block_size_; // Block size, in bytes
+ string device_name_; // Device name
+ int64 device_sectors_; // Number of sectors in device
+ int64 segment_size_; // Segment size in bytes
+ uint64 size_; // Number of elements on table
pthread_mutex_t data_mutex_;
pthread_cond_t data_condition_;
pthread_mutex_t parameter_mutex_;
* current progress.
*/
+#include <inttypes.h>
#include <pthread.h>
#include <signal.h>
#include <stdint.h>
if (a < NOISE) b = a;
if (b < NOISE) {
- printf("Found mask with just %d deviations: 0x%llx\n", b, mask);
+ printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask);
fflush(stdout);
}
}
void signal_handler(int signum) {
- printf("Received signal... currently evaluating mask 0x%llx!\n", lastmask);
+ printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n",
+ lastmask);
fflush(stdout);
}
#include <pthread.h>
#include <stdarg.h>
#include <stdio.h>
+#include <time.h>
#include <unistd.h>
#include <string>
return;
}
char buffer[4096];
- int length = vsnprintf(buffer, sizeof buffer, format, args);
- if (static_cast<size_t>(length) >= sizeof buffer) {
- length = sizeof buffer;
- buffer[sizeof buffer - 1] = '\n';
+ size_t length = 0;
+ if (log_timestamps_) {
+ time_t raw_time;
+ time(&raw_time);
+ struct tm time_struct;
+ localtime_r(&raw_time, &time_struct);
+ length = strftime(buffer, sizeof(buffer), "%Y/%m/%d-%H:%M:%S(%Z) ",
+ &time_struct);
+ LOGGER_ASSERT(length); // Catch if the buffer is set too small.
+ }
+ length += vsnprintf(buffer + length, sizeof(buffer) - length, format, args);
+ if (length >= sizeof(buffer)) {
+ length = sizeof(buffer);
+ buffer[sizeof(buffer) - 1] = '\n';
}
QueueLogLine(new string(buffer, length));
}
}
void Logger::StopThread() {
- LOGGER_ASSERT(thread_running_);
+ // Allow this to be called before the thread has started.
+ if (!thread_running_) {
+ return;
+ }
thread_running_ = false;
- LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+ int retval = pthread_mutex_lock(&queued_lines_mutex_);
+ LOGGER_ASSERT(0 == retval);
bool need_cond_signal = queued_lines_.empty();
queued_lines_.push_back(NULL);
- LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+ retval = pthread_mutex_unlock(&queued_lines_mutex_);
+ LOGGER_ASSERT(0 == retval);
if (need_cond_signal) {
- LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_));
+ retval = pthread_cond_signal(&queued_lines_cond_);
+ LOGGER_ASSERT(0 == retval);
}
- LOGGER_ASSERT(0 == pthread_join(thread_, NULL));
+ retval = pthread_join(thread_, NULL);
+ LOGGER_ASSERT(0 == retval);
}
-Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) {
+Logger::Logger()
+ : verbosity_(20),
+ log_fd_(-1),
+ thread_running_(false),
+ log_timestamps_(true) {
LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL));
LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL));
LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL));
LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
}
-namespace {
-void WriteToFile(const string& line, int fd) {
- LOGGER_ASSERT(write(fd, line.data(), line.size()) ==
- static_cast<ssize_t>(line.size()));
-}
-}
-
void Logger::WriteAndDeleteLogLine(string *line) {
LOGGER_ASSERT(line != NULL);
+ ssize_t bytes_written;
if (log_fd_ >= 0) {
- WriteToFile(*line, log_fd_);
+ bytes_written = write(log_fd_, line->data(), line->size());
+ LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
}
- WriteToFile(*line, 1);
+ bytes_written = write(STDOUT_FILENO, line->data(), line->size());
+ LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
delete line;
}
// Lines with a priority numerically greater than this will not be logged.
// May not be called while multiple threads are running.
- void SetVerbosity(int verbosity) {
+ virtual void SetVerbosity(int verbosity) {
verbosity_ = verbosity;
}
// Args:
// log_fd: The file descriptor to write to. Will not be closed by this
// object.
- void SetLogFd(int log_fd) {
+ virtual void SetLogFd(int log_fd) {
LOGGER_ASSERT(log_fd >= 0);
log_fd_ = log_fd;
}
// Set output to be written to stdout only. This is the default mode. May
// not be called while multiple threads are running.
- void SetStdoutOnly() {
+ virtual void SetStdoutOnly() {
log_fd_ = -1;
}
+ // Enable or disable logging of timestamps.
+ void SetTimestampLogging(bool log_ts_enabled) {
+ log_timestamps_ = log_ts_enabled;
+ }
+
// Logs a line, with a vprintf(3)-like interface. This will block on writing
// the line to stdout/disk iff the dedicated logging thread is not running.
// This will block on adding the line to the queue if doing so would exceed
// before this returns. Waits for the thread to finish before returning.
void StopThread();
- private:
+ protected:
Logger();
- ~Logger();
+ virtual ~Logger();
+ private:
// Args:
// line: Must be non-NULL. This function takes ownership of it.
void QueueLogLine(string *line);
int verbosity_;
int log_fd_;
bool thread_running_;
+ bool log_timestamps_;
vector<string*> queued_lines_;
// This doubles as a mutex for log_fd_ when the logging thread is not running.
pthread_mutex_t queued_lines_mutex_;
// so these includes are correct.
#include "sattypes.h"
#include "error_diag.h"
+#include "clock.h"
// OsLayer initialization.
OsLayer::OsLayer() {
testmemsize_ = 0;
totalmemsize_ = 0;
min_hugepages_bytes_ = 0;
+ reserve_mb_ = 0;
normal_mem_ = true;
use_hugepages_ = false;
use_posix_shm_ = false;
dynamic_mapped_shmem_ = false;
+ mmapped_allocation_ = false;
shmid_ = 0;
time_initialized_ = 0;
has_sse2_ = false;
use_flush_page_cache_ = false;
+
+ clock_ = NULL;
}
// OsLayer cleanup.
OsLayer::~OsLayer() {
if (error_diagnoser_)
delete error_diagnoser_;
+ if (clock_)
+ delete clock_;
}
// OsLayer initialization.
bool OsLayer::Initialize() {
- time_initialized_ = time(NULL);
+ if (!clock_) {
+ clock_ = new Clock();
+ }
+
+ time_initialized_ = clock_->Now();
// Detect asm support.
GetFeatures();
// Translates user virtual to physical address.
uint64 OsLayer::VirtualToPhysical(void *vaddr) {
uint64 frame, shift;
- off64_t off = ((uintptr_t)vaddr) / getpagesize() * 8;
+ off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
int fd = open(kPagemapPath, O_RDONLY);
// /proc/self/pagemap is available in kernel >= 2.6.25
if (fd < 0)
// Get HW core features from cpuid instruction.
void OsLayer::GetFeatures() {
#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
- // CPUID features documented at:
- // http://www.sandpile.org/ia32/cpuid.htm
- int ax, bx, cx, dx;
- __asm__ __volatile__ (
-# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
- "xchg %%ebx, %%esi;"
- "cpuid;"
- "xchg %%esi, %%ebx;"
- : "=S" (bx),
-# else
- "cpuid;"
- : "=b" (bx),
-# endif
- "=a" (ax), "=c" (cx), "=d" (dx) : "a" (1));
- has_clflush_ = (dx >> 19) & 1;
- has_sse2_ = (dx >> 26) & 1;
+ unsigned int eax = 1, ebx, ecx, edx;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ has_clflush_ = (edx >> 19) & 1;
+ has_sse2_ = (edx >> 26) & 1;
logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
has_clflush_ ? "true" : "false",
void OsLayer::Flush(void *vaddr) {
// Use the generic flush. This function is just so we can override
// this if we are so inclined.
- if (has_clflush_)
- FastFlush(vaddr);
+ if (has_clflush_) {
+ OsLayer::FastFlush(vaddr);
+ }
}
// all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
// blocks with bits distributed from each chip in that channel.
int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
- static const string unknown = "DIMM Unknown";
if (!channels_) {
- snprintf(buf, len, "%s", unknown.c_str());
- return 0;
+ snprintf(buf, len, "DIMM Unknown");
+ return -1;
}
// Find channel by XORing address bits in channel_hash mask.
- uint32 low = (uint32)(addr & channel_hash_);
- uint32 high = (uint32)((addr & channel_hash_) >> 32);
+ uint32 low = static_cast<uint32>(addr & channel_hash_);
+ uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
vector<string>& channel = (*channels_)[
__builtin_parity(high) ^ __builtin_parity(low)];
// Report an error in an easily parseable way.
bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
- time_t now = time(NULL);
+ time_t now = clock_->Now();
int ttf = now - time_initialized_;
- logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
+ if (strlen(symptom) && strlen(part)) {
+ logprintf(0, "Report Error: %s : %s : %d : %ds\n",
+ symptom, part, count, ttf);
+ } else {
+ // Log something so the error still shows up, but this won't break the
+ // parser.
+ logprintf(0, "Warning: Invalid Report Error: "
+ "%s : %s : %d : %ds\n", symptom, part, count, ttf);
+ }
return true;
}
//
// TODO(nsanders): is there a more correct way to determine target
// memory size?
- if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
- minsize = min_hugepages_bytes_;
- } else if (physsize < 2048LL * kMegabyte) {
- minsize = ((pages * 85) / 100) * pagesize;
+ if (hugepagesize > 0) {
+ if (min_hugepages_bytes_ > 0) {
+ minsize = min_hugepages_bytes_;
+ } else {
+ minsize = hugepagesize;
+ }
} else {
- minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+ if (physsize < 2048LL * kMegabyte) {
+ minsize = ((pages * 85) / 100) * pagesize;
+ } else {
+ minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+ }
+ // Make sure that at least reserve_mb_ is left for the system.
+ if (reserve_mb_ > 0) {
+ int64 totalsize = pages * pagesize;
+ int64 reserve_kb = reserve_mb_ * kMegabyte;
+ if (reserve_kb > totalsize) {
+ logprintf(0, "Procedural Error: %lld is bigger than the total memory "
+ "available %lld\n", reserve_kb, totalsize);
+ } else if (reserve_kb > totalsize - minsize) {
+ logprintf(5, "Warning: Overriding memory to use: original %lld, "
+ "current %lld\n", minsize, totalsize - reserve_kb);
+ minsize = totalsize - reserve_kb;
+ }
+ }
}
// Use hugepage sizing if available.
"'sudo mount -o remount,size=100\% /dev/shm.'\n");
} else if (hugepagesize >= length) {
prefer_hugepages = true;
- logprintf(3, "Log: Prefer using hugepace allocation.\n");
+ logprintf(3, "Log: Prefer using hugepage allocation.\n");
} else {
logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
}
break;
}
- shmaddr = shmat(shmid, NULL, NULL);
+ shmaddr = shmat(shmid, NULL, 0);
if (shmaddr == reinterpret_cast<void*>(-1)) {
int err = errno;
string errtxt = ErrorString(err);
// Do a full mapping here otherwise.
shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
- shm_object, NULL);
+ shm_object, 0);
if (shmaddr == reinterpret_cast<void*>(-1)) {
int err = errno;
string errtxt = ErrorString(err);
} while (0);
shm_unlink("/stressapptest");
}
-#endif // HAVE_SYS_SHM_H
+#endif // HAVE_SYS_SHM_H
if (!use_hugepages_ && !use_posix_shm_) {
- // Use memalign to ensure that blocks are aligned enough for disk direct IO.
- buf = static_cast<char*>(memalign(4096, length));
- if (buf) {
- logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
- } else {
- logprintf(0, "Process Error: memalign returned 0\n");
- if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
- logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
- "bit process. Please setup shared memory.\n");
+ // If the page size is what SAT is expecting explicitly perform mmap()
+ // allocation.
+ if (sysconf(_SC_PAGESIZE) >= 4096) {
+ void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (map_buf != MAP_FAILED) {
+ buf = map_buf;
+ mmapped_allocation_ = true;
+ logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
+ }
+ }
+ if (!mmapped_allocation_) {
+ // Use memalign to ensure that blocks are aligned enough for disk direct
+ // IO.
+ buf = static_cast<char*>(memalign(4096, length));
+ if (buf) {
+ logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
+ } else {
+ logprintf(0, "Process Error: memalign returned 0\n");
+ if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
+ logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
+ "bit process. Please setup shared memory.\n");
+ }
}
}
}
munmap(testmem_, testmemsize_);
}
close(shmid_);
+ } else if (mmapped_allocation_) {
+ munmap(testmem_, testmemsize_);
} else {
free(testmem_);
}
bool OsLayer::CpuStressWorkload() {
double float_arr[100];
double sum = 0;
+#ifdef HAVE_RAND_R
unsigned int seed = 12345;
+#endif
// Initialize array with random numbers.
for (int i = 0; i < 100; i++) {
if (rand_r(&seed) % 2)
float_arr[i] *= -1.0;
#else
- float_arr[i] = rand();
- if (rand() % 2)
+ srand(time(NULL));
+ float_arr[i] = rand(); // NOLINT
+ if (rand() % 2) // NOLINT
float_arr[i] *= -1.0;
#endif
}
logprintf(12, "Log: I'm Feeling Lucky!\n");
return true;
}
-
-PCIDevices OsLayer::GetPCIDevices() {
- PCIDevices device_list;
- DIR *dir;
- struct dirent *buf = new struct dirent();
- struct dirent *entry;
- dir = opendir(kSysfsPath);
- if (!dir)
- logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
- while (readdir_r(dir, buf, &entry) == 0 && entry) {
- PCIDevice *device;
- unsigned int dev, func;
- // ".", ".." or a special non-device perhaps.
- if (entry->d_name[0] == '.')
- continue;
-
- device = new PCIDevice();
- if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
- &device->domain, &device->bus, &dev, &func) < 4) {
- logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
- free(device);
- continue;
- }
- device->dev = dev;
- device->func = func;
- device->vendor_id = PCIGetValue(entry->d_name, "vendor");
- device->device_id = PCIGetValue(entry->d_name, "device");
- PCIGetResources(entry->d_name, device);
- device_list.insert(device_list.end(), device);
- }
- closedir(dir);
- delete buf;
- return device_list;
-}
-
-int OsLayer::PCIGetValue(string name, string object) {
- int fd, len;
- char filename[256];
- char buf[256];
- snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
- name.c_str(), object.c_str());
- fd = open(filename, O_RDONLY);
- if (fd < 0)
- return 0;
- len = read(fd, buf, 256);
- close(fd);
- buf[len] = '\0';
- return strtol(buf, NULL, 0); // NOLINT
-}
-
-int OsLayer::PCIGetResources(string name, PCIDevice *device) {
- char filename[256];
- char buf[256];
- FILE *file;
- int64 start;
- int64 end;
- int64 size;
- int i;
- snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
- name.c_str(), "resource");
- file = fopen(filename, "r");
- if (!file) {
- logprintf(0, "Process Error: impossible to find resource file for %s",
- filename);
- return errno;
- }
- for (i = 0; i < 6; i++) {
- if (!fgets(buf, 256, file))
- break;
- sscanf(buf, "%llx %llx", &start, &end); // NOLINT
- size = 0;
- if (start)
- size = end - start + 1;
- device->base_addr[i] = start;
- device->size[i] = size;
- }
- fclose(file);
- return 0;
-}
#define STRESSAPPTEST_OS_H_
#include <dirent.h>
+#include <sys/syscall.h>
+
#include <string>
#include <list>
#include <map>
// so these includes are correct.
#include "adler32memcpy.h" // NOLINT
#include "sattypes.h" // NOLINT
+#include "clock.h" // NOLINT
const char kPagemapPath[] = "/proc/self/pagemap";
-const char kSysfsPath[] = "/sys/bus/pci/devices";
struct PCIDevice {
int32 domain;
class ErrorDiag;
+class Clock;
+
// This class implements OS/Platform specific funtions.
class OsLayer {
public:
min_hugepages_bytes_ = min_bytes;
}
+ // Set the minium amount of memory that should not be allocated. This only
+ // has any affect if hugepages are not used.
+ // Must be set before Initialize().
+ void SetReserveSize(int64 reserve_mb) {
+ reserve_mb_ = reserve_mb;
+ }
+
// Set parameters needed to translate physical address to memory module.
void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
vector< vector<string> > *channels) {
// Prints failed dimm. This implementation is optional for
// subclasses to implement.
// Takes a bus address and string, and prints the DIMM name
- // into the string. Returns error status.
+ // into the string. Returns the DIMM number that corresponds to the
+ // address given, or -1 if unable to identify the DIMM number.
+ // Note that subclass implementations of FindDimm() MUST fill
+ // buf with at LEAST one non-whitespace character (provided len > 0).
virtual int FindDimm(uint64 addr, char *buf, int len);
- // Print dimm info, plus more available info.
- virtual int FindDimmExtended(uint64 addr, char *buf, int len) {
- return FindDimm(addr, buf, len);
- }
-
// Classifies addresses according to "regions"
// This may mean different things on different platforms.
// instruction. For example, software can use an MFENCE instruction to
// insure that previous stores are included in the write-back.
asm volatile("mfence");
- asm volatile("clflush (%0)" :: "r" (vaddr));
+ asm volatile("clflush (%0)" : : "r" (vaddr));
+ asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ #warning "Unsupported CPU type ARMV7A: Using syscall to cache flush."
+ // ARMv7a cachelines are 8 words (32 bytes).
+ syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
+#else
+ #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+ }
+
+ // Fast flush, for use in performance critical code.
+ // This is bound at compile time, and will not pick up
+ // any runtime machine configuration info. Takes a NULL-terminated
+ // array of addresses to flush.
+ inline static void FastFlushList(void **vaddrs) {
+#ifdef STRESSAPPTEST_CPU_PPC
+ while (*vaddrs) {
+ asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
+ }
+ asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ // Put mfence before and after clflush to make sure:
+ // 1. The write before the clflush is committed to memory bus;
+ // 2. The read after the clflush is hitting the memory bus.
+ //
+ // From Intel manual:
+ // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+ // to be ordered by any other fencing, serializing or other CLFLUSH
+ // instruction. For example, software can use an MFENCE instruction to
+ // insure that previous stores are included in the write-back.
+ asm volatile("mfence");
+ while (*vaddrs) {
+ asm volatile("clflush (%0)" : : "r" (*vaddrs++));
+ }
+ asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ while (*vaddrs) {
+ FastFlush(*vaddrs++);
+ }
+#else
+ #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+ }
+
+ // Fast flush hint, for use in performance critical code.
+ // This is bound at compile time, and will not pick up
+ // any runtime machine configuration info. Note that this
+ // will not guarantee that a flush happens, but will at least
+ // hint that it should. This is useful for speeding up
+ // parallel march algorithms.
+ inline static void FastFlushHint(void *vaddr) {
+#ifdef STRESSAPPTEST_CPU_PPC
+ asm volatile("dcbf 0,%0" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ // From Intel manual:
+ // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+ // to be ordered by any other fencing, serializing or other CLFLUSH
+ // instruction. For example, software can use an MFENCE instruction to
+ // insure that previous stores are included in the write-back.
+ asm volatile("clflush (%0)" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ FastFlush(vaddr);
+#else
+ #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+ }
+
+ // Fast flush, for use in performance critical code.
+ // This is bound at compile time, and will not pick up
+ // any runtime machine configuration info. Sync's any
+ // transactions for ordering FastFlushHints.
+ inline static void FastFlushSync() {
+#ifdef STRESSAPPTEST_CPU_PPC
+ asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ // Put mfence before and after clflush to make sure:
+ // 1. The write before the clflush is committed to memory bus;
+ // 2. The read after the clflush is hitting the memory bus.
+ //
+ // From Intel manual:
+ // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+ // to be ordered by any other fencing, serializing or other CLFLUSH
+ // instruction. For example, software can use an MFENCE instruction to
+ // insure that previous stores are included in the write-back.
asm volatile("mfence");
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
- #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes."
+ // This is a NOP, FastFlushHint() always does a full flush, so there's
+ // nothing to do for FastFlushSync().
#else
#warning "Unsupported CPU type: Unable to force cache flushes."
#endif
// Handle to platform-specific error diagnoser.
ErrorDiag *error_diagnoser_;
- // Detect all PCI Devices.
- virtual PCIDevices GetPCIDevices();
-
// Disambiguate between different "warm" memcopies.
virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
unsigned int size_in_bytes,
}
ErrCallback get_err_log_callback() { return err_log_callback_; }
+ // Set a clock object that can be overridden for use with unit tests.
+ void SetClock(Clock *clock) {
+ if (clock_) {
+ delete clock_;
+ }
+ clock_ = clock;
+ time_initialized_ = clock_->Now();
+ }
+
protected:
void *testmem_; // Location of test memory.
uint64 testmemsize_; // Size of test memory.
int64 totalmemsize_; // Size of available memory.
int64 min_hugepages_bytes_; // Minimum hugepages size.
+ int64 reserve_mb_; // Minimum amount of memory to reserve in MB.
bool error_injection_; // Do error injection?
bool normal_mem_; // Memory DMA capable?
bool use_hugepages_; // Use hugepage shmem?
bool use_posix_shm_; // Use 4k page shmem?
bool dynamic_mapped_shmem_; // Conserve virtual address space.
+ bool mmapped_allocation_; // Was memory allocated using mmap()?
int shmid_; // Handle to shmem
vector< vector<string> > *channels_; // Memory module names per channel.
uint64 channel_hash_; // Mask of address bits XORed for channel.
// Get file descriptor for dev msr.
virtual int OpenMSR(uint32 core, uint32 address);
- // Auxiliary methods for PCI device configuration
- int PCIGetValue(string name, string object);
- int PCIGetResources(string name, PCIDevice *device);
// Look up how many hugepages there are.
virtual int64 FindHugePages();
// Link to find last transaction at an error location.
ErrCallback err_log_callback_;
+ // Object to wrap the time function.
+ Clock *clock_;
+
private:
DISALLOW_COPY_AND_ASSIGN(OsLayer);
};
#error Build system regression - COPTS disregarded.
#endif
+ // Check if the cpu frequency test is enabled and able to run.
+ if (cpu_freq_test_) {
+ if (!CpuFreqThread::CanRun()) {
+ logprintf(0, "Process Error: This platform does not support this "
+ "test.\n");
+ bad_status();
+ return false;
+ } else if (cpu_freq_threshold_ <= 0) {
+ logprintf(0, "Process Error: The cpu frequency test requires "
+ "--cpu_freq_threshold set to a value > 0\n");
+ bad_status();
+ return false;
+ } else if (cpu_freq_round_ < 0) {
+ logprintf(0, "Process Error: The --cpu_freq_round option must be greater"
+ " than or equal to zero. A value of zero means no rounding.\n");
+ bad_status();
+ return false;
+ }
+ }
+
// Use all CPUs if nothing is specified.
if (memory_threads_ == -1) {
memory_threads_ = os_->num_cpus();
if (GetValid(&pe, kInvalidTag)) {
int64 paddr = os_->VirtualToPhysical(pe.addr);
int32 region = os_->FindRegion(paddr);
-
- if (i < 256) {
- char buf[256];
- os_->FindDimm(paddr, buf, sizeof(buf));
- logprintf(12, "Log: address: %#llx, %s\n", paddr, buf);
- }
region_[region]++;
pe.paddr = paddr;
pe.tag = 1 << region;
// Initializes sync'd log file to ensure output is saved.
if (!InitializeLogfile())
return false;
+ Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_);
Logger::GlobalLogger()->StartThread();
logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str());
if (min_hugepages_mbytes_ > 0)
os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+
+ if (reserve_mb_ > 0)
+ os_->SetReserveSize(reserve_mb_);
+
if (channels_.size() > 0) {
logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
"%d modules per channel (x%d), decoding hash 0x%x\n",
pages_ = 0;
size_mb_ = 0;
size_ = size_mb_ * kMegabyte;
+ reserve_mb_ = 0;
min_hugepages_mbytes_ = 0;
freepages_ = 0;
paddr_base_ = 0;
run_on_anything_ = 0;
use_logfile_ = 0;
logfile_ = 0;
+ log_timestamps_ = true;
// Detect 32/64 bit binary.
void *pvoid = 0;
address_mode_ = sizeof(pvoid) * 8;
// Cache coherency data initialization.
cc_test_ = false; // Flag to trigger cc threads.
cc_cacheline_count_ = 2; // Two datastructures of cache line size.
+ cc_cacheline_size_ = 0; // Size of a cacheline (0 for auto-detect).
cc_inc_count_ = 1000; // Number of times to increment the shared variable.
cc_cacheline_data_ = 0; // Cache Line size datastructure.
+ // Cpu frequency data initialization.
+ cpu_freq_test_ = false; // Flag to trigger cpu frequency thread.
+ cpu_freq_threshold_ = 0; // Threshold, in MHz, at which a cpu fails.
+ cpu_freq_round_ = 10; // Round the computed frequency to this value.
+
sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL));
file_threads_ = 0;
net_threads_ = 0;
// Set number of megabyte to use.
ARG_IVALUE("-M", size_mb_);
+ // Specify the amount of megabytes to be reserved for system.
+ ARG_IVALUE("--reserve_memory", reserve_mb_);
+
// Set minimum megabytes of hugepages to require.
ARG_IVALUE("-H", min_hugepages_mbytes_);
// Set number of cache line size datastructures
ARG_IVALUE("--cc_line_count", cc_cacheline_count_);
+ // Override the detected or assumed cache line size.
+ ARG_IVALUE("--cc_line_size", cc_cacheline_size_);
+
// Flag set when cache coherency tests need to be run
- ARG_KVALUE("--cc_test", cc_test_, 1);
+ ARG_KVALUE("--cc_test", cc_test_, true);
+
+ // Set when the cpu_frequency test needs to be run
+ ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true);
+
+ // Set the threshold in MHz at which the cpu frequency test will fail.
+ ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_);
+
+ // Set the rounding value for the cpu frequency test. The default is to
+ // round to the nearest 10s value.
+ ARG_IVALUE("--cpu_freq_round", cpu_freq_round_);
// Set number of CPU stress threads.
ARG_IVALUE("-C", cpu_stress_threads_);
// Verbosity level.
ARG_IVALUE("-v", verbosity_);
+ // Turn off timestamps logging.
+ ARG_KVALUE("--no_timestamps", log_timestamps_, false);
+
// Set maximum number of errors to collect. Stop running after this many.
ARG_IVALUE("--max_errors", max_errorcount_);
for (uint i = 0; i < channels_.size(); i++)
if (channels_[i].size() != channels_[0].size()) {
logprintf(6, "Process Error: "
- "Channels 0 and %d have a different count of dram modules.\n",i);
+ "Channels 0 and %d have a different count of dram modules.\n", i);
bad_status();
return false;
}
void Sat::PrintHelp() {
printf("Usage: ./sat(32|64) [options]\n"
" -M mbytes megabytes of ram to test\n"
+ " --reserve-memory If not using hugepages, the amount of memory to "
+ " reserve for the system\n"
" -H mbytes minimum megabytes of hugepages to require\n"
" -s seconds number of seconds to run\n"
" -m threads number of memory copy threads to run\n"
" -f filename add a disk thread with "
"tempfile 'filename'\n"
" -l logfile log output to file 'logfile'\n"
+ " --no_timestamps do not prefix timestamps to log messages\n"
" --max_errors n exit early after finding 'n' errors\n"
" -v level verbosity (0-20), default is 8\n"
" -W Use more CPU-stressful memory copy\n"
"cacheline's member\n"
" --cc_line_count number of cache line sized datastructures "
"to allocate for the cache coherency threads to operate\n"
+ " --cc_line_size override the auto-detected cache line size\n"
+ " --cpu_freq_test enable the cpu frequency test (requires the "
+ "--cpu_freq_threshold argument to be set)\n"
+ " --cpu_freq_threshold fail the cpu frequency test if the frequency "
+ "goes below this value (specified in MHz)\n"
+ " --cpu_freq_round round the computed frequency to this value, if set"
+ " to zero, only round to the nearest MHz\n"
" --paddr_base allocate memory starting from this address\n"
" --pause_delay delay (in seconds) between power spikes\n"
" --pause_duration duration (in seconds) of each pause\n"
"each CPU to be tested by that CPU\n"
" --remote_numa choose memory regions not associated with "
"each CPU to be tested by that CPU\n"
- " --channel_hash mask of address bits XORed to determine channel.\n"
- " Mask 0x40 interleaves cachelines between channels\n"
+ " --channel_hash mask of address bits XORed to determine channel. "
+ "Mask 0x40 interleaves cachelines between channels\n"
" --channel_width bits width in bits of each memory channel\n"
- " --memory_channel u1,u2 defines a comma-separated list of names\n"
- " for dram packages in a memory channel.\n"
- " Use multiple times to define multiple channels.\n");
+ " --memory_channel u1,u2 defines a comma-separated list of names "
+ "for dram packages in a memory channel. Use multiple times to "
+ "define multiple channels.\n");
}
bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
sizeof(cc_cacheline_data) * cc_cacheline_count_);
int num_cpus = CpuCount();
+ char *num;
+ // Calculate the number of cache lines needed just to give each core
+ // its own counter.
+ int line_size = cc_cacheline_size_;
+ if (line_size <= 0) {
+ line_size = CacheLineSize();
+ if (line_size < kCacheLineSize)
+ line_size = kCacheLineSize;
+ logprintf(12, "Log: Using %d as cache line size\n", line_size);
+ }
+ // The number of cache lines needed to hold an array of num_cpus.
+ // "num" must be the same type as cc_cacheline_data[X].num or the memory
+ // size calculations will fail.
+ int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size;
// Allocate all the nums once so that we get a single chunk
// of contiguous memory.
- int *num;
#ifdef HAVE_POSIX_MEMALIGN
int err_result = posix_memalign(
reinterpret_cast<void**>(&num),
- kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_);
+ line_size, line_size * needed_lines * cc_cacheline_count_);
#else
- num = reinterpret_cast<int*>(memalign(kCacheLineSize,
- sizeof(*num) * num_cpus * cc_cacheline_count_));
+ num = reinterpret_cast<int*>(memalign(
+ line_size, line_size * needed_lines * cc_cacheline_count_));
int err_result = (num == 0);
#endif
sat_assert(err_result == 0);
int cline;
for (cline = 0; cline < cc_cacheline_count_; cline++) {
- memset(num, 0, sizeof(num_cpus) * num_cpus);
+ memset(num, 0, sizeof(*num) * num_cpus);
cc_cacheline_data_[cline].num = num;
- num += num_cpus;
+ num += (line_size * needed_lines) / sizeof(*num);
}
int tnum;
for (tnum = 0; tnum < num_cpus; tnum++) {
CpuCacheCoherencyThread *thread =
new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_,
- tnum, cc_inc_count_);
+ tnum, num_cpus, cc_inc_count_);
thread->InitThread(total_threads_++, this, os_, patternlist_,
&continuous_status_);
// Pin the thread to a particular core.
}
workers_map_.insert(make_pair(kCCType, cc_vector));
}
+
+ if (cpu_freq_test_) {
+ // Create the frequency test thread.
+ logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n",
+ cpu_freq_threshold_);
+ CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_,
+ cpu_freq_round_);
+ // This thread should be paused when other threads are paused.
+ thread->InitThread(total_threads_++, this, os_, NULL,
+ &power_spike_status_);
+
+ WorkerVector *cpu_freq_vector = new WorkerVector();
+ cpu_freq_vector->insert(cpu_freq_vector->end(), thread);
+ workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector));
+ }
+
ReleaseWorkerLock();
}
return sysconf(_SC_NPROCESSORS_CONF);
}
+// Return the worst case (largest) cache line size of the various levels of
+// cache actually prsent in the machine.
+int Sat::CacheLineSize() {
+ int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+ int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
+ if (linesize > max_linesize) max_linesize = linesize;
+ linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
+ if (linesize > max_linesize) max_linesize = linesize;
+ linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
+ if (linesize > max_linesize) max_linesize = linesize;
+ return max_linesize;
+}
+
// Notify and reap worker threads.
void Sat::JoinThreads() {
logprintf(12, "Log: Joining worker threads\n");
Logger::GlobalLogger()->VLogF(priority, format, args);
va_end(args);
}
+
+// Stop the logging thread and verify any pending data is written to the log.
+void logstop() {
+ Logger::GlobalLogger()->StopThread();
+}
+
// Return the number of cpus in the system.
int CpuCount();
+ // Return the worst-case (largest) cache line size of the system.
+ int CacheLineSize();
// Collect error counts from threads.
int64 GetTotalErrorCount();
int64 pages_; // Number of memory blocks.
int64 size_; // Size of memory tested, in bytes.
int64 size_mb_; // Size of memory tested, in MB.
+ int64 reserve_mb_; // Reserve at least this amount of memory
+ // for the system, in MB.
int64 min_hugepages_mbytes_; // Minimum hugepages size.
int64 freepages_; // How many invalid pages we need.
int disk_pages_; // Number of pages per temp file.
uint64 paddr_base_; // Physical address base.
- vector< vector<string> > channels_; // Memory module names per channel.
uint64 channel_hash_; // Mask of address bits XORed for channel.
int channel_width_; // Channel width in bits.
+ vector< vector<string> > channels_; // Memory module names per channel.
// Control flags.
volatile sig_atomic_t user_break_; // User has signalled early exit. Used as
int use_logfile_; // Log to a file.
char logfilename_[255]; // Name of file to log to.
int logfile_; // File handle to log to.
+ bool log_timestamps_; // Whether to add timestamps to log lines.
// Disk thread options.
int read_block_size_; // Size of block to read from disk.
bool cc_test_; // Flag to decide whether to start the
// cache coherency threads.
int cc_cacheline_count_; // Number of cache line size structures.
+ int cc_cacheline_size_; // Size of a cache line.
int cc_inc_count_; // Number of times to increment the shared
// cache lines structure members.
+ // Cpu Frequency Options.
+ bool cpu_freq_test_; // Flag to decide whether to start the
+ // cpu frequency thread.
+ int cpu_freq_threshold_; // The MHz threshold which will cause
+ // the test to fail.
+ int cpu_freq_round_; // Round the computed frequency to this
+ // value.
+
// Thread control.
int file_threads_; // Threads of file IO.
int net_threads_; // Threads of network IO.
kRandomDiskType = 7,
kCPUType = 8,
kErrorType = 9,
- kCCType = 10
+ kCCType = 10,
+ kCPUFreqType = 11,
};
// Helper functions.
#ifdef HAVE_CONFIG_H // Built using autoconf
#ifdef __ANDROID__
-#include "stressapptest_config_android.h"
+#include "stressapptest_config_android.h" // NOLINT
#else
-#include "stressapptest_config.h"
-using namespace __gnu_cxx;
-#endif
+#include "stressapptest_config.h" // NOLINT
+using namespace __gnu_cxx; //NOLINT
+#endif // __ANDROID__
using namespace std;
typedef signed long long int64;
}
static const bool kOpenSource = true;
-#else
+#else // !HAVE_CONFIG_H
static const bool kOpenSource = false;
- #include "googlesattypes.h"
-#endif
+ #include "googlesattypes.h" // NOLINT
+#endif // HAVE_CONFIG_H
// Workaround to allow 32/64 bit conversion
// without running into strict aliasing problems.
union datacast_t {
// File sync'd print to console and log
void logprintf(int priority, const char *format, ...);
+// Stop the log and dump any queued lines.
+void logstop();
+
// We print to stderr ourselves first in case we're in such a bad state that the
// logger can't work.
#define sat_assert(x) \
{\
if (!(x)) {\
+ logstop();\
fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
exit(1);\
#endif
}
+// Execute the cpuid instruction and pass back the contents of the registers.
+// This only works on x86 based platforms.
+inline void cpuid(
+ unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ // CPUID features documented at:
+ // http://www.sandpile.org/ia32/cpuid.htm
+#if defined(STRESSAPPTEST_CPU_I686) || defined(STRESSAPPTEST_CPU_X86_64)
+#if defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+ // In PIC compilations using the i686 cpu type, ebx contains the address
+ // of the global offset table. The compiler can't properly handle constraints
+ // using the ebx register for this compile, so preserve the register
+ // ourselves.
+ asm(
+ "mov %%ebx, %%edi;"
+ "cpuid;"
+ "xchg %%edi, %%ebx;"
+ // Output registers.
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx)
+ // Input registers.
+ : "a" (*eax)
+ ); // Asm
+#else
+ asm(
+ "cpuid;"
+ // Output registers.
+ : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+ // Input registers.
+ : "a" (*eax)
+ ); // Asm
+#endif // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+#elif defined(STRESSAPPTEST_CPU_PPC)
+ return;
+#else
+#warning "Unsupported CPU type."
+#endif
+}
+
// Define handy constants here
static const int kTicksPerSec = 100;
static const int kMegabyte = (1024LL*1024LL);
#endif
namespace {
- // Get HW core ID from cpuid instruction.
- inline int apicid(void) {
- int cpu;
-#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
- __asm__ __volatile__ (
-# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
- "xchg %%ebx, %%esi;"
- "cpuid;"
- "xchg %%esi, %%ebx;"
- : "=S" (cpu)
-# else
- "cpuid;"
- : "=b" (cpu)
-# endif
- : "a" (1) : "cx", "dx");
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
- #warning "Unsupported CPU type ARMV7A: unable to determine core ID."
- cpu = 0;
-#else
- #warning "Unsupported CPU type: unable to determine core ID."
- cpu = 0;
-#endif
- return (cpu >> 24);
- }
-
// Work around the sad fact that there are two (gnu, xsi) incompatible
// versions of strerror_r floating around google. Awesome.
bool sat_strerror(int err, char *buf, int len) {
inline uint64 addr_to_tag(void *address) {
return reinterpret_cast<uint64>(address);
}
-}
+} // namespace
#if !defined(O_DIRECT)
// Sometimes this isn't available.
WaitOnPauseBarrier();
}
-bool WorkerStatus::ContinueRunning() {
+bool WorkerStatus::ContinueRunning(bool *paused) {
// This loop is an optimization. We use it to immediately re-check the status
// after resuming from a pause, instead of returning and waiting for the next
// call to this function.
+ if (paused) {
+ *paused = false;
+ }
for (;;) {
switch (GetStatus()) {
case RUN:
WaitOnPauseBarrier();
// Wait for ResumeWorkers() to be called.
WaitOnPauseBarrier();
+ // Indicate that a pause occurred.
+ if (paused) {
+ *paused = true;
+ }
break;
case STOP:
return false;
logprintf(11, "Log: Bind to %s failed.\n",
cpuset_format(&cpu_mask_).c_str());
- logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n",
- thread_num_, apicid(),
+ logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n",
+ thread_num_, sched_getcpu(),
CurrentCpusFormat().c_str(),
cpuset_format(&cpu_mask_).c_str());
#if 0
const char *message) {
char dimm_string[256] = "";
- int apic_id = apicid();
+ int core_id = sched_getcpu();
// Determine if this is a write or read error.
os_->Flush(error->vaddr);
"%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
"read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
message,
- apic_id,
+ core_id,
CurrentCpusFormat().c_str(),
error->vaddr,
error->paddr,
if ((state == kGoodAgain) || (state == kBad)) {
unsigned int blockerrors = badend - badstart + 1;
errormessage = "Block Error";
+ // It's okay for the 1st entry to be corrected multiple times,
+ // it will simply be reported twice. Once here and once below
+ // when processing the error queue.
ProcessError(&recorded[0], 0, errormessage.c_str());
logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
"%d bytes from offset 0x%x to 0x%x\n",
blockerrors * wordsize_,
offset + badstart * wordsize_,
offset + badend * wordsize_);
- errorcount_ += blockerrors;
- return blockerrors;
}
}
}
if (page_error) {
// For each word in the data region.
- int error_recount = 0;
for (int i = 0; i < length / wordsize_; i++) {
uint64 actual = memblock[i];
uint64 expected;
// If the value is incorrect, save an error record for later printing.
if (actual != expected) {
- if (error_recount < kErrorLimit) {
- // We already reported these.
- error_recount++;
- } else {
- // If we have overflowed the error queue, print the errors now.
- struct ErrorRecord er;
- er.actual = actual;
- er.expected = expected;
- er.vaddr = &memblock[i];
-
- // Do the error printout. This will take a long time and
- // likely change the machine state.
- ProcessError(&er, 12, errormessage.c_str());
- overflowerrors++;
- }
+ // If we have overflowed the error queue, print the errors now.
+ struct ErrorRecord er;
+ er.actual = actual;
+ er.expected = expected;
+ er.vaddr = &memblock[i];
+
+ // Do the error printout. This will take a long time and
+ // likely change the machine state.
+ ProcessError(&er, 12, errormessage.c_str());
+ overflowerrors++;
}
}
}
char tag_dimm_string[256] = "";
bool read_error = false;
- int apic_id = apicid();
+ int core_id = sched_getcpu();
// Determine if this is a write or read error.
os_->Flush(error->vaddr);
error->tagvaddr, error->tagpaddr,
tag_dimm_string,
read_error ? "read error" : "write error",
- apic_id,
+ core_id,
CurrentCpusFormat().c_str(),
error->vaddr,
error->paddr,
AdlerChecksum ignored_checksum;
os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
- // Force cache flush.
- int length = size_in_bytes / sizeof(*dstmem64);
- for (int i = 0; i < length; i += sizeof(*dstmem64)) {
- os_->FastFlush(dstmem64 + i);
- os_->FastFlush(srcmem64 + i);
+ // Force cache flush of both the source and destination addresses.
+ // length - length of block to flush in cachelines.
+ // mem_increment - number of dstmem/srcmem values per cacheline.
+ int length = size_in_bytes / kCacheLineSize;
+ int mem_increment = kCacheLineSize / sizeof(*dstmem64);
+ OsLayer::FastFlushSync();
+ for (int i = 0; i < length; ++i) {
+ OsLayer::FastFlushHint(dstmem64 + (i * mem_increment));
+ OsLayer::FastFlushHint(srcmem64 + (i * mem_increment));
}
+ OsLayer::FastFlushSync();
+
// Check results.
AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
// Patch up address tags.
blocksize,
currentblock * blocksize, 0);
if (errorcount == 0) {
- int apic_id = apicid();
+ int core_id = sched_getcpu();
logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
"CRC mismatch %s != %s, "
"but no miscompares found on second pass.\n",
- apic_id, CurrentCpusFormat().c_str(),
+ core_id, CurrentCpusFormat().c_str(),
crc.ToHexString().c_str(),
expectedcrc->ToHexString().c_str());
struct ErrorRecord er;
blocksize,
currentblock * blocksize, 0);
if (errorcount == 0) {
- int apic_id = apicid();
+ int core_id = sched_getcpu();
logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
"CRC mismatch %s != %s, "
"but no miscompares found on second pass.\n",
- apic_id, CurrentCpusFormat().c_str(),
+ core_id, CurrentCpusFormat().c_str(),
crc.ToHexString().c_str(),
expectedcrc->ToHexString().c_str());
struct ErrorRecord er;
// Open the file for access.
bool FileThread::OpenFile(int *pfile) {
- bool no_O_DIRECT = false;
int flags = O_RDWR | O_CREAT | O_SYNC;
int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
- no_O_DIRECT = true;
- fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+ fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+ os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
}
if (fd < 0) {
logprintf(0, "Process Error: Failed to create file %s!!\n",
pages_copied_ = 0;
return false;
}
- if (no_O_DIRECT)
- os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
*pfile = fd;
return true;
}
if (!result)
return false;
}
- return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
+ return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
}
// Copy data from file into memory block.
CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
int cacheline_count,
int thread_num,
+ int thread_count,
int inc_count) {
cc_cacheline_data_ = data;
cc_cacheline_count_ = cacheline_count;
cc_thread_num_ = thread_num;
+ cc_thread_count_ = thread_count;
cc_inc_count_ = inc_count;
}
+// A very simple psuedorandom generator. Since the random number is based
+// on only a few simple logic operations, it can be done quickly in registers
+// and the compiler can inline it.
+uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) {
+ return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial);
+}
+
// Worked thread to test the cache coherency of the CPUs
// Return false on fatal sw error.
bool CpuCacheCoherencyThread::Work() {
uint64 time_start, time_end;
struct timeval tv;
+ // Use a slightly more robust random number for the initial
+ // value, so the random sequences from the simple generator will
+ // be more divergent.
+#ifdef HAVE_RAND_R
unsigned int seed = static_cast<unsigned int>(gettid());
+ uint64 r = static_cast<uint64>(rand_r(&seed));
+ r |= static_cast<uint64>(rand_r(&seed)) << 32;
+#else
+ srand(time(NULL));
+ uint64 r = static_cast<uint64>(rand()); // NOLINT
+ r |= static_cast<uint64>(rand()) << 32; // NOLINT
+#endif
+
gettimeofday(&tv, NULL); // Get the timestamp before increments.
time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
// Choose a datastructure in random and increment the appropriate
// member in that according to the offset (which is the same as the
// thread number.
-#ifdef HAVE_RAND_R
- int r = rand_r(&seed);
-#else
- int r = rand();
-#endif
- r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
+ r = SimpleRandom(r);
+ int cline_num = r % cc_cacheline_count_;
+ int offset;
+ // Reverse the order for odd numbered threads in odd numbered cache
+ // lines. This is designed for massively multi-core systems where the
+ // number of cores exceeds the bytes in a cache line, so "distant" cores
+ // get a chance to exercize cache coherency between them.
+ if (cline_num & cc_thread_num_ & 1)
+ offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+ else
+ offset = cc_thread_num_;
// Increment the member of the randomely selected structure.
- (cc_cacheline_data_[r].num[cc_thread_num_])++;
+ (cc_cacheline_data_[cline_num].num[offset])++;
}
total_inc += cc_inc_count_;
// in all the cache line structures for this particular thread.
int cc_global_num = 0;
for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
- cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
+ int offset;
+ // Perform the same offset calculation from above.
+ if (cline_num & cc_thread_num_ & 1)
+ offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+ else
+ offset = cc_thread_num_;
+ cc_global_num += cc_cacheline_data_[cline_num].num[offset];
// Reset the cachline member's value for the next run.
- cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
+ cc_cacheline_data_[cline_num].num[offset] = 0;
}
if (sat_->error_injection())
cc_global_num = -1;
- if (cc_global_num != cc_inc_count_) {
+ // Since the count is only stored in a byte, to squeeze more into a
+ // single cache line, only compare it as a byte. In the event that there
+ // is something detected, the chance that it would be missed by a single
+ // thread is 1 in 256. If it affects all cores, that makes the chance
+ // of it being missed terribly minute. It seems unlikely any failure
+ // case would be off by more than a small number.
+ if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) {
errorcount_++;
logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
cc_global_num, cc_inc_count_);
// Open a device, return false on failure.
bool DiskThread::OpenDevice(int *pfile) {
- bool no_O_DIRECT = false;
int flags = O_RDWR | O_SYNC | O_LARGEFILE;
int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
- no_O_DIRECT = true;
- fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+ fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+ os_->ActivateFlushPageCache();
}
if (fd < 0) {
logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
device_name_.c_str(), thread_num_);
return false;
}
- if (no_O_DIRECT)
- os_->ActivateFlushPageCache();
*pfile = fd;
return GetDiskSize(fd);
// Block is either initialized by writing, or in nondestructive case,
// initialized by being added into the datastructure for later reading.
- block->SetBlockAsInitialized();
+ block->initialized();
in_flight_sectors_.push(block);
}
- if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
+ if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
return false;
// Verify blocks on disk.
errorcount_++;
os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
- if (event.res < 0) {
- switch (event.res) {
+ int64 result = static_cast<int64>(event.res);
+ if (result < 0) {
+ switch (result) {
case -EIO:
logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
"sectors starting at %lld on disk %s (thread %d).\n",
}
return true;
-#else // !HAVE_LIBAIO_H
+#else // !HAVE_LIBAIO_H
return false;
#endif
}
// Write a block to disk.
// Return false if the block is not written.
bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
- memset(block_buffer_, 0, block->GetSize());
+ memset(block_buffer_, 0, block->size());
// Fill block buffer with a pattern
struct page_entry pe;
// Even though a valid page could not be obatined, it is not an error
// since we can always fill in a pattern directly, albeit slower.
unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
- block->SetPattern(patternlist_->GetRandomPattern());
+ block->set_pattern(patternlist_->GetRandomPattern());
logprintf(11, "Log: Warning, using pattern fill fallback in "
"DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
device_name_.c_str(), thread_num_);
- for (int i = 0; i < block->GetSize()/wordsize_; i++) {
- memblock[i] = block->GetPattern()->pattern(i);
+ for (unsigned int i = 0; i < block->size()/wordsize_; i++) {
+ memblock[i] = block->pattern()->pattern(i);
}
} else {
- memcpy(block_buffer_, pe.addr, block->GetSize());
- block->SetPattern(pe.pattern);
+ memcpy(block_buffer_, pe.addr, block->size());
+ block->set_pattern(pe.pattern);
sat_->PutValid(&pe);
}
logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
" (thread %d).\n",
- block->GetSize()/kSectorSize, block->GetAddress(),
+ block->size()/kSectorSize, block->address(),
device_name_.c_str(), thread_num_);
int64 start_time = GetTime();
- if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
- block->GetAddress() * kSectorSize, write_timeout_)) {
+ if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(),
+ block->address() * kSectorSize, write_timeout_)) {
return false;
}
// Return true if the block was read, also increment errorcount
// if the block had data errors or performance problems.
bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
- int64 blocks = block->GetSize() / read_block_size_;
+ int64 blocks = block->size() / read_block_size_;
int64 bytes_read = 0;
int64 current_blocks;
int64 current_bytes;
- uint64 address = block->GetAddress();
+ uint64 address = block->address();
logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
"(thread %d).\n",
// In non-destructive mode, don't compare the block to the pattern since
// the block was never written to disk in the first place.
if (!non_destructive_) {
- if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
+ if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
0, bytes_read)) {
os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
errorcount_ += 1;
// when using direct IO.
#ifdef HAVE_POSIX_MEMALIGN
int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
- sat_->page_length());
+ sat_->page_length());
#else
block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
int memalign_result = (block_buffer_ == 0);
"pages checked\n", thread_num_, status_, pages_copied_);
return result;
}
+
+// The list of MSRs to read from each cpu.
+const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = {
+ { kMsrTscAddr, "TSC" },
+ { kMsrAperfAddr, "APERF" },
+ { kMsrMperfAddr, "MPERF" },
+};
+
+CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round)
+ : num_cpus_(num_cpus),
+ freq_threshold_(freq_threshold),
+ round_(round) {
+ sat_assert(round >= 0);
+ if (round == 0) {
+ // If rounding is off, force rounding to the nearest MHz.
+ round_ = 1;
+ round_value_ = 0.5;
+ } else {
+ round_value_ = round/2.0;
+ }
+}
+
+CpuFreqThread::~CpuFreqThread() {
+}
+
+// Compute the difference between the currently read MSR values and the
+// previously read values and store the results in delta. If any of the
+// values did not increase, or the TSC value is too small, returns false.
+// Otherwise, returns true.
+bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous,
+ CpuDataType *delta) {
+ // Loop through the msrs.
+ for (int msr = 0; msr < kMsrLast; msr++) {
+ if (previous->msrs[msr] > current->msrs[msr]) {
+ logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx "
+ "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr],
+ current->msrs[msr]);
+ return false;
+ } else {
+ delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr];
+ }
+ }
+
+ // Check for TSC < 1 Mcycles over interval.
+ if (delta->msrs[kMsrTsc] < (1000 * 1000)) {
+ logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n");
+ return false;
+ }
+ timersub(¤t->tv, &previous->tv, &delta->tv);
+
+ return true;
+}
+
+// Compute the change in values of the MSRs between current and previous,
+// set the frequency in MHz of the cpu. If there is an error computing
+// the delta, return false. Othewise, return true.
+bool CpuFreqThread::ComputeFrequency(CpuDataType *current,
+ CpuDataType *previous, int *freq) {
+ CpuDataType delta;
+ if (!ComputeDelta(current, previous, &delta)) {
+ return false;
+ }
+
+ double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0;
+ double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000
+ * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval;
+
+ // Use the rounding value to round up properly.
+ int computed = static_cast<int>(frequency + round_value_);
+ *freq = computed - (computed % round_);
+ return true;
+}
+
+// This is the task function that the thread executes.
+bool CpuFreqThread::Work() {
+ cpu_set_t cpuset;
+ if (!AvailableCpus(&cpuset)) {
+ logprintf(0, "Process Error: Cannot get information about the cpus.\n");
+ return false;
+ }
+
+ // Start off indicating the test is passing.
+ status_ = true;
+
+ int curr = 0;
+ int prev = 1;
+ uint32 num_intervals = 0;
+ bool paused = false;
+ bool valid;
+ bool pass = true;
+
+ vector<CpuDataType> data[2];
+ data[0].resize(num_cpus_);
+ data[1].resize(num_cpus_);
+ while (IsReadyToRun(&paused)) {
+ if (paused) {
+ // Reset the intervals and restart logic after the pause.
+ num_intervals = 0;
+ }
+ if (num_intervals == 0) {
+ // If this is the first interval, then always wait a bit before
+ // starting to collect data.
+ sat_sleep(kStartupDelay);
+ }
+
+ // Get the per cpu counters.
+ valid = true;
+ for (int cpu = 0; cpu < num_cpus_; cpu++) {
+ if (CPU_ISSET(cpu, &cpuset)) {
+ if (!GetMsrs(cpu, &data[curr][cpu])) {
+ logprintf(0, "Failed to get msrs on cpu %d.\n", cpu);
+ valid = false;
+ break;
+ }
+ }
+ }
+ if (!valid) {
+ // Reset the number of collected intervals since something bad happened.
+ num_intervals = 0;
+ continue;
+ }
+
+ num_intervals++;
+
+ // Only compute a delta when we have at least two intervals worth of data.
+ if (num_intervals > 2) {
+ for (int cpu = 0; cpu < num_cpus_; cpu++) {
+ if (CPU_ISSET(cpu, &cpuset)) {
+ int freq;
+ if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu],
+ &freq)) {
+ // Reset the number of collected intervals since an unknown
+ // error occurred.
+ logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu);
+ num_intervals = 0;
+ break;
+ }
+ logprintf(15, "Cpu %d Freq %d\n", cpu, freq);
+ if (freq < freq_threshold_) {
+ errorcount_++;
+ pass = false;
+ logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz "
+ "threshold %d MHz.\n", cpu, freq, freq_threshold_);
+ }
+ }
+ }
+ }
+
+ sat_sleep(kIntervalPause);
+
+ // Swap the values in curr and prev (these values flip between 0 and 1).
+ curr ^= 1;
+ prev ^= 1;
+ }
+
+ return pass;
+}
+
+
+// Get the MSR values for this particular cpu and save them in data. If
+// any error is encountered, returns false. Otherwise, returns true.
+bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) {
+ for (int msr = 0; msr < kMsrLast; msr++) {
+ if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) {
+ return false;
+ }
+ }
+ // Save the time at which we acquired these values.
+ gettimeofday(&data->tv, NULL);
+
+ return true;
+}
+
+// Returns true if this test can run on the current machine. Otherwise,
+// returns false.
+bool CpuFreqThread::CanRun() {
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ unsigned int eax, ebx, ecx, edx;
+
+ // Check that the TSC feature is supported.
+ // This check is valid for both Intel and AMD.
+ eax = 1;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(edx & (1 << 5))) {
+ logprintf(0, "Process Error: No TSC support.\n");
+ return false;
+ }
+
+ // Check the highest extended function level supported.
+ // This check is valid for both Intel and AMD.
+ eax = 0x80000000;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x80000007) {
+ logprintf(0, "Process Error: No invariant TSC support.\n");
+ return false;
+ }
+
+ // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+ // This check is valid for both Intel and AMD.
+ eax = 0x80000007;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if ((edx & (1 << 8)) == 0) {
+ logprintf(0, "Process Error: No non-stop TSC support.\n");
+ return false;
+ }
+
+ // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+ // This check is valid for both Intel and AMD.
+ eax = 0x6;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if ((ecx & 1) == 0) {
+ logprintf(0, "Process Error: No APERF MSR support.\n");
+ return false;
+ }
+ return true;
+#else
+ logprintf(0, "Process Error: "
+ "cpu_freq_test is only supported on X86 processors.\n");
+ return false;
+#endif
+}
// Global Datastruture shared by the Cache Coherency Worker Threads.
struct cc_cacheline_data {
- int *num;
+ char *num;
};
// Typical usage:
// ResumeWorkers() or StopWorkers() has been called. Number of distinct
// calling threads must match the worker count (see AddWorkers() and
// RemoveSelf()).
- bool ContinueRunning();
+ bool ContinueRunning(bool *paused);
- // TODO(matthewb): Is this functionality really necessary? Remove it if not.
- //
// This is a hack! It's like ContinueRunning(), except it won't pause. If
// any worker threads use this exclusively in place of ContinueRunning() then
// PauseWorkers() should never be used!
// do {
// // work.
// } while (IsReadyToRun());
- virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); }
- // TODO(matthewb): Is this function really necessary? Remove it if not.
- //
+ virtual bool IsReadyToRun(bool *paused = NULL) {
+ return worker_status_->ContinueRunning(paused);
+ }
+
// Like IsReadyToRun(), except it won't pause.
virtual bool IsReadyToRunNoPause() {
return worker_status_->ContinueRunningNoPause();
CpuCacheCoherencyThread(cc_cacheline_data *cc_data,
int cc_cacheline_count_,
int cc_thread_num_,
+ int cc_thread_count_,
int cc_inc_count_);
virtual bool Work();
protected:
+ // Used by the simple random number generator as a shift feedback;
+ // this polynomial (x^64 + x^63 + x^61 + x^60 + 1) will produce a
+ // psuedorandom cycle of period 2^64-1.
+ static const uint64 kRandomPolynomial = 0xD800000000000000ULL;
+ // A very simple psuedorandom generator that can be inlined and use
+ // registers, to keep the CC test loop tight and focused.
+ static uint64 SimpleRandom(uint64 seed);
+
cc_cacheline_data *cc_cacheline_data_; // Datstructure for each cacheline.
int cc_local_num_; // Local counter for each thread.
int cc_cacheline_count_; // Number of cache lines to operate on.
int cc_thread_num_; // The integer id of the thread which is
// used as an index into the integer array
// of the cacheline datastructure.
+ int cc_thread_count_; // Total number of threads being run, for
+ // calculations mixing up cache line access.
int cc_inc_count_; // Number of times to increment the counter.
private:
DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread);
};
+// Worker thread to check that the frequency of every cpu does not go below a
+// certain threshold.
+class CpuFreqThread : public WorkerThread {
+ public:
+ CpuFreqThread(int num_cpus, int freq_threshold, int round);
+ ~CpuFreqThread();
+
+ // This is the task function that the thread executes.
+ virtual bool Work();
+
+ // Returns true if this test can run on the current machine. Otherwise,
+ // returns false.
+ static bool CanRun();
+
+ private:
+ static const int kIntervalPause = 10; // The number of seconds to pause
+ // between acquiring the MSR data.
+ static const int kStartupDelay = 5; // The number of seconds to wait
+ // before acquiring MSR data.
+ static const int kMsrTscAddr = 0x10; // The address of the TSC MSR.
+ static const int kMsrAperfAddr = 0xE8; // The address of the APERF MSR.
+ static const int kMsrMperfAddr = 0xE7; // The address of the MPERF MSR.
+
+ // The index values into the CpuDataType.msr[] array.
+ enum MsrValues {
+ kMsrTsc = 0, // MSR index 0 = TSC.
+ kMsrAperf = 1, // MSR index 1 = APERF.
+ kMsrMperf = 2, // MSR index 2 = MPERF.
+ kMsrLast, // Last MSR index.
+ };
+
+ typedef struct {
+ uint32 msr; // The address of the MSR.
+ const char *name; // A human readable string for the MSR.
+ } CpuRegisterType;
+
+ typedef struct {
+ uint64 msrs[kMsrLast]; // The values of the MSRs.
+ struct timeval tv; // The time at which the MSRs were read.
+ } CpuDataType;
+
+ // The set of MSR addresses and register names.
+ static const CpuRegisterType kCpuRegisters[kMsrLast];
+
+ // Compute the change in values of the MSRs between current and previous,
+ // set the frequency in MHz of the cpu. If there is an error computing
+ // the delta, return false. Othewise, return true.
+ bool ComputeFrequency(CpuDataType *current, CpuDataType *previous,
+ int *frequency);
+
+ // Get the MSR values for this particular cpu and save them in data. If
+ // any error is encountered, returns false. Otherwise, returns true.
+ bool GetMsrs(int cpu, CpuDataType *data);
+
+ // Compute the difference between the currently read MSR values and the
+ // previously read values and store the results in delta. If any of the
+ // values did not increase, or the TSC value is too small, returns false.
+ // Otherwise, returns true.
+ bool ComputeDelta(CpuDataType *current, CpuDataType *previous,
+ CpuDataType *delta);
+
+ // The total number of cpus on the system.
+ int num_cpus_;
+
+ // The minimum frequency that each cpu must operate at (in MHz).
+ int freq_threshold_;
+
+ // The value to round the computed frequency to.
+ int round_;
+
+ // Precomputed value to add to the frequency to do the rounding.
+ double round_value_;
+
+ DISALLOW_COPY_AND_ASSIGN(CpuFreqThread);
+};
+
#endif // STRESSAPPTEST_WORKER_H_
.TP
.B \-\-cc_line_count <number>
-Mumber of cache line sized datastructures to allocate for the cache coherency
+Number of cache line sized datastructures to allocate for the cache coherency
threads to operate.
+.TP
+.B \-\-cc_line_size <number>
+Size of cache line to use as the basis for cache coherency test data
+structures.
+
.TP
.B \-\-cc_test
Do the cache coherency testing.