From: ewout Date: Tue, 10 Sep 2013 21:27:49 +0000 (+0000) Subject: New frequency test, fixed error accounting, added logging timestamps, and miscellaneo... X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?a=commitdiff_plain;h=2ea87b7996f4f433d5d946eaf8f0d2f6fd18c144;p=stressapptest New frequency test, fixed error accounting, added logging timestamps, and miscellaneous smaller changes. * Added a CPU Frequency test for select X86 processors to verify a minimum frequency is maintained during non-pause periods. * Fixed the error accounting in WorkerThread::CheckRegion if more than 128 miscompares are found and when block errors are detected. * Updated the logger to include timestamps and the associated timezone. * Moved from apicid() to sched_getcpu() for determining the core ID. * Added the ability to reserve a specified amount of memory. This can override the requested memory allocation. * If not using POSIX shared memory or hugepages, explicitly mmap memory if the pagesize is 4kB otherwise use memalign. * Removed the OSLayer's unused PCI device handling. * Numerous refactoring changes. --- diff --git a/configure.ac b/configure.ac index ca10966..6f09eb9 100644 --- a/configure.ac +++ b/configure.ac @@ -5,10 +5,10 @@ AC_ARG_WITH(static, [ --with-static enable static linking]) if test "$with_static" == "yes" then - AC_MSG_NOTICE([Compiling with staticaly linked libraries.]) - LIBS="$LIBS -static" + AC_MSG_NOTICE([Compiling with staticaly linked libraries.]) + LIBS="$LIBS -static" else - AC_MSG_NOTICE([Compiling with dynamically linked libraries.]) + AC_MSG_NOTICE([Compiling with dynamically linked libraries.]) fi AC_CANONICAL_HOST diff --git a/src/Makefile.am b/src/Makefile.am index 2179b42..16f539d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -28,6 +28,7 @@ HFILES += error_diag.h HFILES += disk_blocks.h HFILES += adler32memcpy.h HFILES += logger.h +HFILES += clock.h stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES) findmask_SOURCES = findmask.c findmask.inc diff --git a/src/clock.h b/src/clock.h new file mode 100644 index 0000000..4204188 --- /dev/null +++ b/src/clock.h @@ -0,0 +1,29 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: cferris + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef STRESSAPPTEST_CLOCK_H_ // NOLINT +#define STRESSAPPTEST_CLOCK_H_ + +#include + +// This class implements a clock that can be overriden for unit tests. +class Clock { + public: + virtual ~Clock() {} + + virtual time_t Now() { return time(NULL); } +}; + +#endif // STRESSAPPTEST_CLOCK_H_ NOLINT diff --git a/src/disk_blocks.cc b/src/disk_blocks.cc index c7860b0..60018f9 100644 --- a/src/disk_blocks.cc +++ b/src/disk_blocks.cc @@ -14,38 +14,51 @@ // Thread-safe container of disk blocks -#include - // This file must work with autoconf on its public version, // so these includes are correct. #include "disk_blocks.h" -DiskBlockTable::DiskBlockTable() { - nelems_ = 0; +#include + +// BlockData +BlockData::BlockData() : address_(0), size_(0), + references_(0), initialized_(false), + pattern_(NULL) { + pthread_mutex_init(&data_mutex_, NULL); +} + +BlockData::~BlockData() { + pthread_mutex_destroy(&data_mutex_); +} + +void BlockData::set_initialized() { + pthread_mutex_lock(&data_mutex_); + initialized_ = true; + pthread_mutex_unlock(&data_mutex_); +} + +bool BlockData::initialized() const { + pthread_mutex_lock(&data_mutex_); + bool initialized = initialized_; + pthread_mutex_unlock(&data_mutex_); + return initialized; +} + +// DiskBlockTable +DiskBlockTable::DiskBlockTable() : sector_size_(0), write_block_size_(0), + device_name_(""), device_sectors_(0), + segment_size_(0), size_(0) { pthread_mutex_init(&data_mutex_, NULL); pthread_mutex_init(¶meter_mutex_, NULL); pthread_cond_init(&data_condition_, NULL); } DiskBlockTable::~DiskBlockTable() { - CleanTable(); pthread_mutex_destroy(&data_mutex_); pthread_mutex_destroy(¶meter_mutex_); pthread_cond_destroy(&data_condition_); } -void DiskBlockTable::CleanTable() { - pthread_mutex_lock(&data_mutex_); - for (map::iterator it = - addr_to_block_.begin(); it != addr_to_block_.end(); ++it) { - delete it->second; - } - addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end()); - nelems_ = 0; - pthread_cond_broadcast(&data_condition_); - pthread_mutex_unlock(&data_mutex_); -} - // 64-bit non-negative random number generator. Stolen from // depot/google3/base/tracecontext_unittest.cc. int64 DiskBlockTable::Random64() { @@ -58,28 +71,27 @@ int64 DiskBlockTable::Random64() { return -x; } -int64 DiskBlockTable::NumElems() { - unsigned int nelems; +uint64 DiskBlockTable::Size() { pthread_mutex_lock(&data_mutex_); - nelems = nelems_; + uint64 size = size_; pthread_mutex_unlock(&data_mutex_); - return nelems; + return size; } void DiskBlockTable::InsertOnStructure(BlockData *block) { - int64 address = block->GetAddress(); + int64 address = block->address(); StorageData *sd = new StorageData(); sd->block = block; - sd->pos = nelems_; + sd->pos = size_; // Creating new block ... pthread_mutex_lock(&data_mutex_); - if (pos_to_addr_.size() <= nelems_) { + if (pos_to_addr_.size() <= size_) { pos_to_addr_.insert(pos_to_addr_.end(), address); } else { - pos_to_addr_[nelems_] = address; + pos_to_addr_[size_] = address; } - addr_to_block_.insert(std::make_pair(address, sd)); - nelems_++; + addr_to_block_[address] = sd; + size_++; pthread_cond_broadcast(&data_condition_); pthread_mutex_unlock(&data_mutex_); } @@ -87,26 +99,28 @@ void DiskBlockTable::InsertOnStructure(BlockData *block) { int DiskBlockTable::RemoveBlock(BlockData *block) { // For write threads, check the reference counter and remove // it from the structure. - int64 address = block->GetAddress(); + int64 address = block->address(); AddrToBlockMap::iterator it = addr_to_block_.find(address); int ret = 1; if (it != addr_to_block_.end()) { int curr_pos = it->second->pos; - int last_pos = nelems_ - 1; + int last_pos = size_ - 1; AddrToBlockMap::iterator last_it = addr_to_block_.find( pos_to_addr_[last_pos]); - sat_assert(nelems_ > 0); + sat_assert(size_ > 0); sat_assert(last_it != addr_to_block_.end()); - // Everything is fine, updating ... + // Everything is fine, removing block from table. pthread_mutex_lock(&data_mutex_); pos_to_addr_[curr_pos] = pos_to_addr_[last_pos]; last_it->second->pos = curr_pos; delete it->second; addr_to_block_.erase(it); - nelems_--; + size_--; block->DecreaseReferenceCounter(); if (block->GetReferenceCounter() == 0) delete block; + else if (block->GetReferenceCounter() < 0) + ret = 0; pthread_cond_broadcast(&data_condition_); pthread_mutex_unlock(&data_mutex_); } else { @@ -116,18 +130,16 @@ int DiskBlockTable::RemoveBlock(BlockData *block) { } int DiskBlockTable::ReleaseBlock(BlockData *block) { - // If is a random thread, just check the reference counter. + // If caller is a random thread, just check the reference counter. int ret = 1; pthread_mutex_lock(&data_mutex_); int references = block->GetReferenceCounter(); - if (references > 0) { - if (references == 1) - delete block; - else - block->DecreaseReferenceCounter(); - } else { + if (references == 1) + delete block; + else if (references > 0) + block->DecreaseReferenceCounter(); + else ret = 0; - } pthread_mutex_unlock(&data_mutex_); return ret; } @@ -135,13 +147,13 @@ int DiskBlockTable::ReleaseBlock(BlockData *block) { BlockData *DiskBlockTable::GetRandomBlock() { struct timespec ts; struct timeval tp; - int result = 0; gettimeofday(&tp, NULL); ts.tv_sec = tp.tv_sec; ts.tv_nsec = tp.tv_usec * 1000; ts.tv_sec += 2; // Wait for 2 seconds. + int result = 0; pthread_mutex_lock(&data_mutex_); - while (!nelems_ && result != ETIMEDOUT) { + while (!size_ && result != ETIMEDOUT) { result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts); } if (result == ETIMEDOUT) { @@ -149,13 +161,13 @@ BlockData *DiskBlockTable::GetRandomBlock() { return NULL; } else { int64 random_number = Random64(); - int64 random_pos = random_number % nelems_; + int64 random_pos = random_number % size_; int64 address = pos_to_addr_[random_pos]; AddrToBlockMap::const_iterator it = addr_to_block_.find(address); sat_assert(it != addr_to_block_.end()); BlockData *b = it->second->block; // A block is returned only if its content is written on disk. - if (b->BlockIsInitialized()) { + if (b->initialized()) { b->IncreaseReferenceCounter(); } else { b = NULL; @@ -165,45 +177,38 @@ BlockData *DiskBlockTable::GetRandomBlock() { } } -void DiskBlockTable::SetParameters( - int sector_size, int write_block_size, int64 device_sectors, - int64 segment_size, string device_name) { +void DiskBlockTable::SetParameters(int sector_size, + int write_block_size, + int64 device_sectors, + int64 segment_size, + const string& device_name) { + sat_assert(size_ == 0); pthread_mutex_lock(¶meter_mutex_); sector_size_ = sector_size; write_block_size_ = write_block_size; device_sectors_ = device_sectors; segment_size_ = segment_size; device_name_ = device_name; - CleanTable(); pthread_mutex_unlock(¶meter_mutex_); } BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { int64 sector = 0; BlockData *block = new BlockData(); - bool good_sequence = false; - int num_sectors; - if (block == NULL) { logprintf(0, "Process Error: Unable to allocate memory " "for sector data for disk %s.\n", device_name_.c_str()); return NULL; } - pthread_mutex_lock(¶meter_mutex_); - sat_assert(device_sectors_ != 0); - // Align the first sector with the beginning of a write block - num_sectors = write_block_size_ / sector_size_; - + int num_sectors = write_block_size_ / sector_size_; for (int i = 0; i < kBlockRetry && !good_sequence; i++) { good_sequence = true; - // Use the entire disk or a small segment of the disk to allocate the first // sector in the block from. - if (segment_size_ == -1) { sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % ( device_sectors_ / num_sectors); @@ -213,7 +218,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { segment_size_ / num_sectors); sector *= num_sectors; sector += segment * segment_size_; - // Make sure the block is within the segment. if (sector + num_sectors > (segment + 1) * segment_size_) { good_sequence = false; @@ -229,7 +233,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { // now aligned to the write_block_size, it is not necessary // to check each sector, just the first block (a sector // overlap will never occur). - pthread_mutex_lock(&data_mutex_); if (addr_to_block_.find(sector) != addr_to_block_.end()) { good_sequence = false; @@ -238,7 +241,8 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { } if (good_sequence) { - block->SetParameters(sector, write_block_size_); + block->set_address(sector); + block->set_size(write_block_size_); block->IncreaseReferenceCounter(); InsertOnStructure(block); } else { @@ -248,66 +252,5 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { block = NULL; } pthread_mutex_unlock(¶meter_mutex_); - return block; } - -// BlockData - -BlockData::BlockData() { - addr_ = 0; - size_ = 0; - references_ = 0; - initialized_ = false; - pthread_mutex_init(&data_mutex_, NULL); -} - -BlockData::~BlockData() { - pthread_mutex_destroy(&data_mutex_); -} - -void BlockData::SetParameters(int64 address, int64 size) { - addr_ = address; - size_ = size; -} - -void BlockData::IncreaseReferenceCounter() { - references_++; -} - -void BlockData::DecreaseReferenceCounter() { - references_--; -} - -int BlockData::GetReferenceCounter() { - return references_; -} - -void BlockData::SetBlockAsInitialized() { - pthread_mutex_lock(&data_mutex_); - initialized_ = true; - pthread_mutex_unlock(&data_mutex_); -} - -bool BlockData::BlockIsInitialized() { - pthread_mutex_lock(&data_mutex_); - bool initialized = initialized_; - pthread_mutex_unlock(&data_mutex_); - return initialized; -} - -int64 BlockData::GetAddress() { - return addr_; -} - -int64 BlockData::GetSize() { - return size_; -} - -Pattern *BlockData::GetPattern() { - return pattern_; -} - -void BlockData::SetPattern(Pattern *p) { - pattern_ = p; -} diff --git a/src/disk_blocks.h b/src/disk_blocks.h index cb634c9..638ee9f 100644 --- a/src/disk_blocks.h +++ b/src/disk_blocks.h @@ -25,87 +25,146 @@ #include #include #include -// This file must work with autoconf on its public version, -// so these includes are correct. -#include "pattern.h" + +#include "sattypes.h" + +class Pattern; // Data about a block written to disk so that it can be verified later. +// Thread-unsafe, must be used with locks on non-const methods, +// except for initialized accessor/mutator, which are thread-safe +// (and in fact, is the only method supposed to be accessed from +// someone which is not the thread-safe DiskBlockTable). class BlockData { public: BlockData(); ~BlockData(); - void SetParameters(int64 address, int64 size); - void IncreaseReferenceCounter(); - void DecreaseReferenceCounter(); - int GetReferenceCounter(); - void SetBlockAsInitialized(); - bool BlockIsInitialized(); - int64 GetAddress(); - int64 GetSize(); - void SetPattern(Pattern *p); - Pattern *GetPattern(); - protected: - int64 addr_; // address of first sector in block - int64 size_; // size of block - int references_; // reference counter - bool initialized_; // flag indicating the block was written on disk + + // These are reference counters used to control how many + // threads currently have a copy of this particular block. + void IncreaseReferenceCounter() { references_++; } + void DecreaseReferenceCounter() { references_--; } + int GetReferenceCounter() const { return references_; } + + // Controls whether the block was written on disk or not. + // Once written, you cannot "un-written" then without destroying + // this object. + void set_initialized(); + bool initialized() const; + + // Accessor methods for some data related to blocks. + void set_address(uint64 address) { address_ = address; } + uint64 address() const { return address_; } + void set_size(uint64 size) { size_ = size; } + uint64 size() const { return size_; } + void set_pattern(Pattern *p) { pattern_ = p; } + Pattern *pattern() { return pattern_; } + private: + uint64 address_; // Address of first sector in block + uint64 size_; // Size of block + int references_; // Reference counter + bool initialized_; // Flag indicating the block was written on disk Pattern *pattern_; - pthread_mutex_t data_mutex_; + mutable pthread_mutex_t data_mutex_; DISALLOW_COPY_AND_ASSIGN(BlockData); }; -// Disk Block table - store data from blocks to be write / read by -// a DiskThread +// A thread-safe table used to store block data and control access +// to these blocks, letting several threads read and write blocks on +// disk. class DiskBlockTable { public: DiskBlockTable(); virtual ~DiskBlockTable(); - // Get Number of elements stored on table - int64 NumElems(); - // Clean all table data - void CleanTable(); - // Get a random block from the list. Only returns if a element - // is available (consider that other thread must have added them. - BlockData *GetRandomBlock(); - // Set all initial parameters. Assumes all existent data is + // Returns number of elements stored on table. + uint64 Size(); + + // Sets all initial parameters. Assumes all existent data is // invalid and, therefore, must be removed. void SetParameters(int sector_size, int write_block_size, int64 device_sectors, int64 segment_size, - string device_name); - // Return a new block in a unused address. + const string& device_name); + + // During the regular execution, there will be 2 types of threads: + // - Write thread: gets a large number of blocks using GetUnusedBlock, + // writes them on disk (if on destructive mode), + // reads block content ONCE from disk and them removes + // the block from queue with RemoveBlock. After a removal a + // block is not available for read threads, but it is + // only removed from memory if there is no reference for + // this block. Note that a write thread also counts as + // a reference. + // - Read threads: get one block at a time (if available) with + // GetRandomBlock, reads its content from disk, + // checking whether it is correct or not, and releases + // (Using ReleaseBlock) the block to be erased by the + // write threads. Since several read threads are allowed + // to read the same block, a reference counter is used to + // control when the block can be REALLY erased from + // memory, and all memory management is made by a + // DiskBlockTable instance. + + // Returns a new block in a unused address. Does not + // grant ownership of the pointer to the caller + // (use RemoveBlock to delete the block from memory instead). BlockData *GetUnusedBlock(int64 segment); - // Remove block from structure (called by write threads) + + // Removes block from structure (called by write threads). Returns + // 1 if successful, 0 otherwise. int RemoveBlock(BlockData *block); - // Release block to be erased (called by random threads) - int ReleaseBlock(BlockData *block); - protected: + // Gets a random block from the list. Only returns if an element + // is available (a write thread has got this block, written it on disk, + // and set this block as initialized). Does not grant ownership of the + // pointer to the caller (use RemoveBlock to delete the block from + // memory instead). + BlockData *GetRandomBlock(); - void InsertOnStructure(BlockData *block); - // Generate a random 64-bit integer (virtual so it could be - // override by the tests) - virtual int64 Random64(); + // Releases block to be erased (called by random threads). Returns + // 1 if successful, 0 otherwise. + int ReleaseBlock(BlockData *block); + protected: struct StorageData { BlockData *block; int pos; }; - - static const int kBlockRetry = 100; // Number of retries to allocate - // sectors. - typedef map AddrToBlockMap; typedef vector PosToAddrVector; + + // Inserts block in structure, used in tests and by other methods. + void InsertOnStructure(BlockData *block); + + // Generates a random 64-bit integer. + // Virtual method so it can be overridden by the tests. + virtual int64 Random64(); + + // Accessor methods for testing. + const PosToAddrVector& pos_to_addr() const { return pos_to_addr_; } + const AddrToBlockMap& addr_to_block() const { return addr_to_block_; } + + int sector_size() const { return sector_size_; } + int write_block_size() const { return write_block_size_; } + const string& device_name() const { return device_name_; } + int64 device_sectors() const { return device_sectors_; } + int64 segment_size() const { return segment_size_; } + + private: + // Number of retries to allocate sectors. + static const int kBlockRetry = 100; + // Actual tables. PosToAddrVector pos_to_addr_; AddrToBlockMap addr_to_block_; - uint64 nelems_; - int sector_size_; // Sector size, in bytes - int write_block_size_; // Block size, in bytes - string device_name_; // Device name - int64 device_sectors_; // Number of sectors in device - int64 segment_size_; // Segment size, in bytes + + // Configuration parameters for block selection + int sector_size_; // Sector size, in bytes + int write_block_size_; // Block size, in bytes + string device_name_; // Device name + int64 device_sectors_; // Number of sectors in device + int64 segment_size_; // Segment size in bytes + uint64 size_; // Number of elements on table pthread_mutex_t data_mutex_; pthread_cond_t data_condition_; pthread_mutex_t parameter_mutex_; diff --git a/src/findmask.c b/src/findmask.c index d8ec300..1b10988 100644 --- a/src/findmask.c +++ b/src/findmask.c @@ -38,6 +38,7 @@ * current progress. */ +#include #include #include #include @@ -106,7 +107,7 @@ void* thread_func(void* arg) { if (a < NOISE) b = a; if (b < NOISE) { - printf("Found mask with just %d deviations: 0x%llx\n", b, mask); + printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask); fflush(stdout); } @@ -118,7 +119,8 @@ void* thread_func(void* arg) { } void signal_handler(int signum) { - printf("Received signal... currently evaluating mask 0x%llx!\n", lastmask); + printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n", + lastmask); fflush(stdout); } diff --git a/src/logger.cc b/src/logger.cc index e4ecb03..f13e003 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -37,10 +38,20 @@ void Logger::VLogF(int priority, const char *format, va_list args) { return; } char buffer[4096]; - int length = vsnprintf(buffer, sizeof buffer, format, args); - if (static_cast(length) >= sizeof buffer) { - length = sizeof buffer; - buffer[sizeof buffer - 1] = '\n'; + size_t length = 0; + if (log_timestamps_) { + time_t raw_time; + time(&raw_time); + struct tm time_struct; + localtime_r(&raw_time, &time_struct); + length = strftime(buffer, sizeof(buffer), "%Y/%m/%d-%H:%M:%S(%Z) ", + &time_struct); + LOGGER_ASSERT(length); // Catch if the buffer is set too small. + } + length += vsnprintf(buffer + length, sizeof(buffer) - length, format, args); + if (length >= sizeof(buffer)) { + length = sizeof(buffer); + buffer[sizeof(buffer) - 1] = '\n'; } QueueLogLine(new string(buffer, length)); } @@ -52,19 +63,30 @@ void Logger::StartThread() { } void Logger::StopThread() { - LOGGER_ASSERT(thread_running_); + // Allow this to be called before the thread has started. + if (!thread_running_) { + return; + } thread_running_ = false; - LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_)); + int retval = pthread_mutex_lock(&queued_lines_mutex_); + LOGGER_ASSERT(0 == retval); bool need_cond_signal = queued_lines_.empty(); queued_lines_.push_back(NULL); - LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_)); + retval = pthread_mutex_unlock(&queued_lines_mutex_); + LOGGER_ASSERT(0 == retval); if (need_cond_signal) { - LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_)); + retval = pthread_cond_signal(&queued_lines_cond_); + LOGGER_ASSERT(0 == retval); } - LOGGER_ASSERT(0 == pthread_join(thread_, NULL)); + retval = pthread_join(thread_, NULL); + LOGGER_ASSERT(0 == retval); } -Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) { +Logger::Logger() + : verbosity_(20), + log_fd_(-1), + thread_running_(false), + log_timestamps_(true) { LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL)); LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL)); LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL)); @@ -94,19 +116,15 @@ void Logger::QueueLogLine(string *line) { LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_)); } -namespace { -void WriteToFile(const string& line, int fd) { - LOGGER_ASSERT(write(fd, line.data(), line.size()) == - static_cast(line.size())); -} -} - void Logger::WriteAndDeleteLogLine(string *line) { LOGGER_ASSERT(line != NULL); + ssize_t bytes_written; if (log_fd_ >= 0) { - WriteToFile(*line, log_fd_); + bytes_written = write(log_fd_, line->data(), line->size()); + LOGGER_ASSERT(bytes_written == static_cast(line->size())); } - WriteToFile(*line, 1); + bytes_written = write(STDOUT_FILENO, line->data(), line->size()); + LOGGER_ASSERT(bytes_written == static_cast(line->size())); delete line; } diff --git a/src/logger.h b/src/logger.h index 1d70107..21b3c6b 100644 --- a/src/logger.h +++ b/src/logger.h @@ -62,7 +62,7 @@ class Logger { // Lines with a priority numerically greater than this will not be logged. // May not be called while multiple threads are running. - void SetVerbosity(int verbosity) { + virtual void SetVerbosity(int verbosity) { verbosity_ = verbosity; } @@ -72,17 +72,22 @@ class Logger { // Args: // log_fd: The file descriptor to write to. Will not be closed by this // object. - void SetLogFd(int log_fd) { + virtual void SetLogFd(int log_fd) { LOGGER_ASSERT(log_fd >= 0); log_fd_ = log_fd; } // Set output to be written to stdout only. This is the default mode. May // not be called while multiple threads are running. - void SetStdoutOnly() { + virtual void SetStdoutOnly() { log_fd_ = -1; } + // Enable or disable logging of timestamps. + void SetTimestampLogging(bool log_ts_enabled) { + log_timestamps_ = log_ts_enabled; + } + // Logs a line, with a vprintf(3)-like interface. This will block on writing // the line to stdout/disk iff the dedicated logging thread is not running. // This will block on adding the line to the queue if doing so would exceed @@ -104,11 +109,12 @@ class Logger { // before this returns. Waits for the thread to finish before returning. void StopThread(); - private: + protected: Logger(); - ~Logger(); + virtual ~Logger(); + private: // Args: // line: Must be non-NULL. This function takes ownership of it. void QueueLogLine(string *line); @@ -127,6 +133,7 @@ class Logger { int verbosity_; int log_fd_; bool thread_running_; + bool log_timestamps_; vector queued_lines_; // This doubles as a mutex for log_fd_ when the logging thread is not running. pthread_mutex_t queued_lines_mutex_; diff --git a/src/os.cc b/src/os.cc index 7cae23b..6358398 100644 --- a/src/os.cc +++ b/src/os.cc @@ -48,6 +48,7 @@ // so these includes are correct. #include "sattypes.h" #include "error_diag.h" +#include "clock.h" // OsLayer initialization. OsLayer::OsLayer() { @@ -55,10 +56,12 @@ OsLayer::OsLayer() { testmemsize_ = 0; totalmemsize_ = 0; min_hugepages_bytes_ = 0; + reserve_mb_ = 0; normal_mem_ = true; use_hugepages_ = false; use_posix_shm_ = false; dynamic_mapped_shmem_ = false; + mmapped_allocation_ = false; shmid_ = 0; time_initialized_ = 0; @@ -79,17 +82,25 @@ OsLayer::OsLayer() { has_sse2_ = false; use_flush_page_cache_ = false; + + clock_ = NULL; } // OsLayer cleanup. OsLayer::~OsLayer() { if (error_diagnoser_) delete error_diagnoser_; + if (clock_) + delete clock_; } // OsLayer initialization. bool OsLayer::Initialize() { - time_initialized_ = time(NULL); + if (!clock_) { + clock_ = new Clock(); + } + + time_initialized_ = clock_->Now(); // Detect asm support. GetFeatures(); @@ -130,7 +141,7 @@ int OsLayer::AddressMode() { // Translates user virtual to physical address. uint64 OsLayer::VirtualToPhysical(void *vaddr) { uint64 frame, shift; - off64_t off = ((uintptr_t)vaddr) / getpagesize() * 8; + off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8; int fd = open(kPagemapPath, O_RDONLY); // /proc/self/pagemap is available in kernel >= 2.6.25 if (fd < 0) @@ -169,22 +180,10 @@ list OsLayer::FindFileDevices() { // Get HW core features from cpuid instruction. void OsLayer::GetFeatures() { #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) - // CPUID features documented at: - // http://www.sandpile.org/ia32/cpuid.htm - int ax, bx, cx, dx; - __asm__ __volatile__ ( -# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__) - "xchg %%ebx, %%esi;" - "cpuid;" - "xchg %%esi, %%ebx;" - : "=S" (bx), -# else - "cpuid;" - : "=b" (bx), -# endif - "=a" (ax), "=c" (cx), "=d" (dx) : "a" (1)); - has_clflush_ = (dx >> 19) & 1; - has_sse2_ = (dx >> 26) & 1; + unsigned int eax = 1, ebx, ecx, edx; + cpuid(&eax, &ebx, &ecx, &edx); + has_clflush_ = (edx >> 19) & 1; + has_sse2_ = (edx >> 26) & 1; logprintf(9, "Log: has clflush: %s, has sse2: %s\n", has_clflush_ ? "true" : "false", @@ -244,8 +243,9 @@ bool OsLayer::FlushPageCache(void) { void OsLayer::Flush(void *vaddr) { // Use the generic flush. This function is just so we can override // this if we are so inclined. - if (has_clflush_) - FastFlush(vaddr); + if (has_clflush_) { + OsLayer::FastFlush(vaddr); + } } @@ -266,15 +266,14 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, // all address bits in the 'channel_hash' mask, with repeated 'channel_width_' // blocks with bits distributed from each chip in that channel. int OsLayer::FindDimm(uint64 addr, char *buf, int len) { - static const string unknown = "DIMM Unknown"; if (!channels_) { - snprintf(buf, len, "%s", unknown.c_str()); - return 0; + snprintf(buf, len, "DIMM Unknown"); + return -1; } // Find channel by XORing address bits in channel_hash mask. - uint32 low = (uint32)(addr & channel_hash_); - uint32 high = (uint32)((addr & channel_hash_) >> 32); + uint32 low = static_cast(addr & channel_hash_); + uint32 high = static_cast((addr & channel_hash_) >> 32); vector& channel = (*channels_)[ __builtin_parity(high) ^ __builtin_parity(low)]; @@ -342,9 +341,17 @@ string OsLayer::FindCoreMaskFormat(int32 region) { // Report an error in an easily parseable way. bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) { - time_t now = time(NULL); + time_t now = clock_->Now(); int ttf = now - time_initialized_; - logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf); + if (strlen(symptom) && strlen(part)) { + logprintf(0, "Report Error: %s : %s : %d : %ds\n", + symptom, part, count, ttf); + } else { + // Log something so the error still shows up, but this won't break the + // parser. + logprintf(0, "Warning: Invalid Report Error: " + "%s : %s : %d : %ds\n", symptom, part, count, ttf); + } return true; } @@ -408,12 +415,31 @@ int64 OsLayer::FindFreeMemSize() { // // TODO(nsanders): is there a more correct way to determine target // memory size? - if (hugepagesize > 0 && min_hugepages_bytes_ > 0) { - minsize = min_hugepages_bytes_; - } else if (physsize < 2048LL * kMegabyte) { - minsize = ((pages * 85) / 100) * pagesize; + if (hugepagesize > 0) { + if (min_hugepages_bytes_ > 0) { + minsize = min_hugepages_bytes_; + } else { + minsize = hugepagesize; + } } else { - minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte); + if (physsize < 2048LL * kMegabyte) { + minsize = ((pages * 85) / 100) * pagesize; + } else { + minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte); + } + // Make sure that at least reserve_mb_ is left for the system. + if (reserve_mb_ > 0) { + int64 totalsize = pages * pagesize; + int64 reserve_kb = reserve_mb_ * kMegabyte; + if (reserve_kb > totalsize) { + logprintf(0, "Procedural Error: %lld is bigger than the total memory " + "available %lld\n", reserve_kb, totalsize); + } else if (reserve_kb > totalsize - minsize) { + logprintf(5, "Warning: Overriding memory to use: original %lld, " + "current %lld\n", minsize, totalsize - reserve_kb); + minsize = totalsize - reserve_kb; + } + } } // Use hugepage sizing if available. @@ -484,7 +510,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { "'sudo mount -o remount,size=100\% /dev/shm.'\n"); } else if (hugepagesize >= length) { prefer_hugepages = true; - logprintf(3, "Log: Prefer using hugepace allocation.\n"); + logprintf(3, "Log: Prefer using hugepage allocation.\n"); } else { logprintf(3, "Log: Prefer plain malloc memory allocation.\n"); } @@ -507,7 +533,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { break; } - shmaddr = shmat(shmid, NULL, NULL); + shmaddr = shmat(shmid, NULL, 0); if (shmaddr == reinterpret_cast(-1)) { int err = errno; string errtxt = ErrorString(err); @@ -564,7 +590,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { // Do a full mapping here otherwise. shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE, - shm_object, NULL); + shm_object, 0); if (shmaddr == reinterpret_cast(-1)) { int err = errno; string errtxt = ErrorString(err); @@ -589,18 +615,32 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { } while (0); shm_unlink("/stressapptest"); } -#endif // HAVE_SYS_SHM_H +#endif // HAVE_SYS_SHM_H if (!use_hugepages_ && !use_posix_shm_) { - // Use memalign to ensure that blocks are aligned enough for disk direct IO. - buf = static_cast(memalign(4096, length)); - if (buf) { - logprintf(0, "Log: Using memaligned allocation at %p.\n", buf); - } else { - logprintf(0, "Process Error: memalign returned 0\n"); - if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) { - logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 " - "bit process. Please setup shared memory.\n"); + // If the page size is what SAT is expecting explicitly perform mmap() + // allocation. + if (sysconf(_SC_PAGESIZE) >= 4096) { + void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map_buf != MAP_FAILED) { + buf = map_buf; + mmapped_allocation_ = true; + logprintf(0, "Log: Using mmap() allocation at %p.\n", buf); + } + } + if (!mmapped_allocation_) { + // Use memalign to ensure that blocks are aligned enough for disk direct + // IO. + buf = static_cast(memalign(4096, length)); + if (buf) { + logprintf(0, "Log: Using memaligned allocation at %p.\n", buf); + } else { + logprintf(0, "Process Error: memalign returned 0\n"); + if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) { + logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 " + "bit process. Please setup shared memory.\n"); + } } } } @@ -628,6 +668,8 @@ void OsLayer::FreeTestMem() { munmap(testmem_, testmemsize_); } close(shmid_); + } else if (mmapped_allocation_) { + munmap(testmem_, testmemsize_); } else { free(testmem_); } @@ -849,7 +891,9 @@ uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) { bool OsLayer::CpuStressWorkload() { double float_arr[100]; double sum = 0; +#ifdef HAVE_RAND_R unsigned int seed = 12345; +#endif // Initialize array with random numbers. for (int i = 0; i < 100; i++) { @@ -858,8 +902,9 @@ bool OsLayer::CpuStressWorkload() { if (rand_r(&seed) % 2) float_arr[i] *= -1.0; #else - float_arr[i] = rand(); - if (rand() % 2) + srand(time(NULL)); + float_arr[i] = rand(); // NOLINT + if (rand() % 2) // NOLINT float_arr[i] *= -1.0; #endif } @@ -877,82 +922,3 @@ bool OsLayer::CpuStressWorkload() { logprintf(12, "Log: I'm Feeling Lucky!\n"); return true; } - -PCIDevices OsLayer::GetPCIDevices() { - PCIDevices device_list; - DIR *dir; - struct dirent *buf = new struct dirent(); - struct dirent *entry; - dir = opendir(kSysfsPath); - if (!dir) - logprintf(0, "Process Error: Cannot open %s", kSysfsPath); - while (readdir_r(dir, buf, &entry) == 0 && entry) { - PCIDevice *device; - unsigned int dev, func; - // ".", ".." or a special non-device perhaps. - if (entry->d_name[0] == '.') - continue; - - device = new PCIDevice(); - if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d", - &device->domain, &device->bus, &dev, &func) < 4) { - logprintf(0, "Process Error: Couldn't parse %s", entry->d_name); - free(device); - continue; - } - device->dev = dev; - device->func = func; - device->vendor_id = PCIGetValue(entry->d_name, "vendor"); - device->device_id = PCIGetValue(entry->d_name, "device"); - PCIGetResources(entry->d_name, device); - device_list.insert(device_list.end(), device); - } - closedir(dir); - delete buf; - return device_list; -} - -int OsLayer::PCIGetValue(string name, string object) { - int fd, len; - char filename[256]; - char buf[256]; - snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath, - name.c_str(), object.c_str()); - fd = open(filename, O_RDONLY); - if (fd < 0) - return 0; - len = read(fd, buf, 256); - close(fd); - buf[len] = '\0'; - return strtol(buf, NULL, 0); // NOLINT -} - -int OsLayer::PCIGetResources(string name, PCIDevice *device) { - char filename[256]; - char buf[256]; - FILE *file; - int64 start; - int64 end; - int64 size; - int i; - snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath, - name.c_str(), "resource"); - file = fopen(filename, "r"); - if (!file) { - logprintf(0, "Process Error: impossible to find resource file for %s", - filename); - return errno; - } - for (i = 0; i < 6; i++) { - if (!fgets(buf, 256, file)) - break; - sscanf(buf, "%llx %llx", &start, &end); // NOLINT - size = 0; - if (start) - size = end - start + 1; - device->base_addr[i] = start; - device->size[i] = size; - } - fclose(file); - return 0; -} diff --git a/src/os.h b/src/os.h index a928577..13660d8 100644 --- a/src/os.h +++ b/src/os.h @@ -17,6 +17,8 @@ #define STRESSAPPTEST_OS_H_ #include +#include + #include #include #include @@ -26,9 +28,9 @@ // so these includes are correct. #include "adler32memcpy.h" // NOLINT #include "sattypes.h" // NOLINT +#include "clock.h" // NOLINT const char kPagemapPath[] = "/proc/self/pagemap"; -const char kSysfsPath[] = "/sys/bus/pci/devices"; struct PCIDevice { int32 domain; @@ -45,6 +47,8 @@ typedef vector PCIDevices; class ErrorDiag; +class Clock; + // This class implements OS/Platform specific funtions. class OsLayer { public: @@ -57,6 +61,13 @@ class OsLayer { min_hugepages_bytes_ = min_bytes; } + // Set the minium amount of memory that should not be allocated. This only + // has any affect if hugepages are not used. + // Must be set before Initialize(). + void SetReserveSize(int64 reserve_mb) { + reserve_mb_ = reserve_mb; + } + // Set parameters needed to translate physical address to memory module. void SetDramMappingParams(uintptr_t channel_hash, int channel_width, vector< vector > *channels) { @@ -77,13 +88,11 @@ class OsLayer { // Prints failed dimm. This implementation is optional for // subclasses to implement. // Takes a bus address and string, and prints the DIMM name - // into the string. Returns error status. + // into the string. Returns the DIMM number that corresponds to the + // address given, or -1 if unable to identify the DIMM number. + // Note that subclass implementations of FindDimm() MUST fill + // buf with at LEAST one non-whitespace character (provided len > 0). virtual int FindDimm(uint64 addr, char *buf, int len); - // Print dimm info, plus more available info. - virtual int FindDimmExtended(uint64 addr, char *buf, int len) { - return FindDimm(addr, buf, len); - } - // Classifies addresses according to "regions" // This may mean different things on different platforms. @@ -141,10 +150,95 @@ class OsLayer { // instruction. For example, software can use an MFENCE instruction to // insure that previous stores are included in the write-back. asm volatile("mfence"); - asm volatile("clflush (%0)" :: "r" (vaddr)); + asm volatile("clflush (%0)" : : "r" (vaddr)); + asm volatile("mfence"); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + #warning "Unsupported CPU type ARMV7A: Using syscall to cache flush." + // ARMv7a cachelines are 8 words (32 bytes). + syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast(vaddr) + 32, 0); +#else + #warning "Unsupported CPU type: Unable to force cache flushes." +#endif + } + + // Fast flush, for use in performance critical code. + // This is bound at compile time, and will not pick up + // any runtime machine configuration info. Takes a NULL-terminated + // array of addresses to flush. + inline static void FastFlushList(void **vaddrs) { +#ifdef STRESSAPPTEST_CPU_PPC + while (*vaddrs) { + asm volatile("dcbf 0,%0" : : "r" (*vaddrs++)); + } + asm volatile("sync"); +#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // Put mfence before and after clflush to make sure: + // 1. The write before the clflush is committed to memory bus; + // 2. The read after the clflush is hitting the memory bus. + // + // From Intel manual: + // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed + // to be ordered by any other fencing, serializing or other CLFLUSH + // instruction. For example, software can use an MFENCE instruction to + // insure that previous stores are included in the write-back. + asm volatile("mfence"); + while (*vaddrs) { + asm volatile("clflush (%0)" : : "r" (*vaddrs++)); + } + asm volatile("mfence"); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + while (*vaddrs) { + FastFlush(*vaddrs++); + } +#else + #warning "Unsupported CPU type: Unable to force cache flushes." +#endif + } + + // Fast flush hint, for use in performance critical code. + // This is bound at compile time, and will not pick up + // any runtime machine configuration info. Note that this + // will not guarantee that a flush happens, but will at least + // hint that it should. This is useful for speeding up + // parallel march algorithms. + inline static void FastFlushHint(void *vaddr) { +#ifdef STRESSAPPTEST_CPU_PPC + asm volatile("dcbf 0,%0" : : "r" (vaddr)); +#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // From Intel manual: + // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed + // to be ordered by any other fencing, serializing or other CLFLUSH + // instruction. For example, software can use an MFENCE instruction to + // insure that previous stores are included in the write-back. + asm volatile("clflush (%0)" : : "r" (vaddr)); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + FastFlush(vaddr); +#else + #warning "Unsupported CPU type: Unable to force cache flushes." +#endif + } + + // Fast flush, for use in performance critical code. + // This is bound at compile time, and will not pick up + // any runtime machine configuration info. Sync's any + // transactions for ordering FastFlushHints. + inline static void FastFlushSync() { +#ifdef STRESSAPPTEST_CPU_PPC + asm volatile("sync"); +#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // Put mfence before and after clflush to make sure: + // 1. The write before the clflush is committed to memory bus; + // 2. The read after the clflush is hitting the memory bus. + // + // From Intel manual: + // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed + // to be ordered by any other fencing, serializing or other CLFLUSH + // instruction. For example, software can use an MFENCE instruction to + // insure that previous stores are included in the write-back. asm volatile("mfence"); #elif defined(STRESSAPPTEST_CPU_ARMV7A) - #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes." + // This is a NOP, FastFlushHint() always does a full flush, so there's + // nothing to do for FastFlushSync(). #else #warning "Unsupported CPU type: Unable to force cache flushes." #endif @@ -239,9 +333,6 @@ class OsLayer { // Handle to platform-specific error diagnoser. ErrorDiag *error_diagnoser_; - // Detect all PCI Devices. - virtual PCIDevices GetPCIDevices(); - // Disambiguate between different "warm" memcopies. virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, unsigned int size_in_bytes, @@ -258,16 +349,27 @@ class OsLayer { } ErrCallback get_err_log_callback() { return err_log_callback_; } + // Set a clock object that can be overridden for use with unit tests. + void SetClock(Clock *clock) { + if (clock_) { + delete clock_; + } + clock_ = clock; + time_initialized_ = clock_->Now(); + } + protected: void *testmem_; // Location of test memory. uint64 testmemsize_; // Size of test memory. int64 totalmemsize_; // Size of available memory. int64 min_hugepages_bytes_; // Minimum hugepages size. + int64 reserve_mb_; // Minimum amount of memory to reserve in MB. bool error_injection_; // Do error injection? bool normal_mem_; // Memory DMA capable? bool use_hugepages_; // Use hugepage shmem? bool use_posix_shm_; // Use 4k page shmem? bool dynamic_mapped_shmem_; // Conserve virtual address space. + bool mmapped_allocation_; // Was memory allocated using mmap()? int shmid_; // Handle to shmem vector< vector > *channels_; // Memory module names per channel. uint64 channel_hash_; // Mask of address bits XORed for channel. @@ -291,9 +393,6 @@ class OsLayer { // Get file descriptor for dev msr. virtual int OpenMSR(uint32 core, uint32 address); - // Auxiliary methods for PCI device configuration - int PCIGetValue(string name, string object); - int PCIGetResources(string name, PCIDevice *device); // Look up how many hugepages there are. virtual int64 FindHugePages(); @@ -301,6 +400,9 @@ class OsLayer { // Link to find last transaction at an error location. ErrCallback err_log_callback_; + // Object to wrap the time function. + Clock *clock_; + private: DISALLOW_COPY_AND_ASSIGN(OsLayer); }; diff --git a/src/sat.cc b/src/sat.cc index 4f4e684..57fd4fe 100644 --- a/src/sat.cc +++ b/src/sat.cc @@ -125,6 +125,26 @@ bool Sat::CheckEnvironment() { #error Build system regression - COPTS disregarded. #endif + // Check if the cpu frequency test is enabled and able to run. + if (cpu_freq_test_) { + if (!CpuFreqThread::CanRun()) { + logprintf(0, "Process Error: This platform does not support this " + "test.\n"); + bad_status(); + return false; + } else if (cpu_freq_threshold_ <= 0) { + logprintf(0, "Process Error: The cpu frequency test requires " + "--cpu_freq_threshold set to a value > 0\n"); + bad_status(); + return false; + } else if (cpu_freq_round_ < 0) { + logprintf(0, "Process Error: The --cpu_freq_round option must be greater" + " than or equal to zero. A value of zero means no rounding.\n"); + bad_status(); + return false; + } + } + // Use all CPUs if nothing is specified. if (memory_threads_ == -1) { memory_threads_ = os_->num_cpus(); @@ -491,12 +511,6 @@ bool Sat::InitializePages() { if (GetValid(&pe, kInvalidTag)) { int64 paddr = os_->VirtualToPhysical(pe.addr); int32 region = os_->FindRegion(paddr); - - if (i < 256) { - char buf[256]; - os_->FindDimm(paddr, buf, sizeof(buf)); - logprintf(12, "Log: address: %#llx, %s\n", paddr, buf); - } region_[region]++; pe.paddr = paddr; pe.tag = 1 << region; @@ -554,6 +568,7 @@ bool Sat::Initialize() { // Initializes sync'd log file to ensure output is saved. if (!InitializeLogfile()) return false; + Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_); Logger::GlobalLogger()->StartThread(); logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str()); @@ -572,6 +587,10 @@ bool Sat::Initialize() { if (min_hugepages_mbytes_ > 0) os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte); + + if (reserve_mb_ > 0) + os_->SetReserveSize(reserve_mb_); + if (channels_.size() > 0) { logprintf(6, "Log: Decoding memory: %dx%d bit channels," "%d modules per channel (x%d), decoding hash 0x%x\n", @@ -647,6 +666,7 @@ Sat::Sat() { pages_ = 0; size_mb_ = 0; size_ = size_mb_ * kMegabyte; + reserve_mb_ = 0; min_hugepages_mbytes_ = 0; freepages_ = 0; paddr_base_ = 0; @@ -661,6 +681,7 @@ Sat::Sat() { run_on_anything_ = 0; use_logfile_ = 0; logfile_ = 0; + log_timestamps_ = true; // Detect 32/64 bit binary. void *pvoid = 0; address_mode_ = sizeof(pvoid) * 8; @@ -678,9 +699,15 @@ Sat::Sat() { // Cache coherency data initialization. cc_test_ = false; // Flag to trigger cc threads. cc_cacheline_count_ = 2; // Two datastructures of cache line size. + cc_cacheline_size_ = 0; // Size of a cacheline (0 for auto-detect). cc_inc_count_ = 1000; // Number of times to increment the shared variable. cc_cacheline_data_ = 0; // Cache Line size datastructure. + // Cpu frequency data initialization. + cpu_freq_test_ = false; // Flag to trigger cpu frequency thread. + cpu_freq_threshold_ = 0; // Threshold, in MHz, at which a cpu fails. + cpu_freq_round_ = 10; // Round the computed frequency to this value. + sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL)); file_threads_ = 0; net_threads_ = 0; @@ -774,6 +801,9 @@ bool Sat::ParseArgs(int argc, char **argv) { // Set number of megabyte to use. ARG_IVALUE("-M", size_mb_); + // Specify the amount of megabytes to be reserved for system. + ARG_IVALUE("--reserve_memory", reserve_mb_); + // Set minimum megabytes of hugepages to require. ARG_IVALUE("-H", min_hugepages_mbytes_); @@ -795,8 +825,21 @@ bool Sat::ParseArgs(int argc, char **argv) { // Set number of cache line size datastructures ARG_IVALUE("--cc_line_count", cc_cacheline_count_); + // Override the detected or assumed cache line size. + ARG_IVALUE("--cc_line_size", cc_cacheline_size_); + // Flag set when cache coherency tests need to be run - ARG_KVALUE("--cc_test", cc_test_, 1); + ARG_KVALUE("--cc_test", cc_test_, true); + + // Set when the cpu_frequency test needs to be run + ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true); + + // Set the threshold in MHz at which the cpu frequency test will fail. + ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_); + + // Set the rounding value for the cpu frequency test. The default is to + // round to the nearest 10s value. + ARG_IVALUE("--cpu_freq_round", cpu_freq_round_); // Set number of CPU stress threads. ARG_IVALUE("-C", cpu_stress_threads_); @@ -807,6 +850,9 @@ bool Sat::ParseArgs(int argc, char **argv) { // Verbosity level. ARG_IVALUE("-v", verbosity_); + // Turn off timestamps logging. + ARG_KVALUE("--no_timestamps", log_timestamps_, false); + // Set maximum number of errors to collect. Stop running after this many. ARG_IVALUE("--max_errors", max_errorcount_); @@ -1004,7 +1050,7 @@ bool Sat::ParseArgs(int argc, char **argv) { for (uint i = 0; i < channels_.size(); i++) if (channels_[i].size() != channels_[0].size()) { logprintf(6, "Process Error: " - "Channels 0 and %d have a different count of dram modules.\n",i); + "Channels 0 and %d have a different count of dram modules.\n", i); bad_status(); return false; } @@ -1043,6 +1089,8 @@ bool Sat::ParseArgs(int argc, char **argv) { void Sat::PrintHelp() { printf("Usage: ./sat(32|64) [options]\n" " -M mbytes megabytes of ram to test\n" + " --reserve-memory If not using hugepages, the amount of memory to " + " reserve for the system\n" " -H mbytes minimum megabytes of hugepages to require\n" " -s seconds number of seconds to run\n" " -m threads number of memory copy threads to run\n" @@ -1054,6 +1102,7 @@ void Sat::PrintHelp() { " -f filename add a disk thread with " "tempfile 'filename'\n" " -l logfile log output to file 'logfile'\n" + " --no_timestamps do not prefix timestamps to log messages\n" " --max_errors n exit early after finding 'n' errors\n" " -v level verbosity (0-20), default is 8\n" " -W Use more CPU-stressful memory copy\n" @@ -1091,6 +1140,13 @@ void Sat::PrintHelp() { "cacheline's member\n" " --cc_line_count number of cache line sized datastructures " "to allocate for the cache coherency threads to operate\n" + " --cc_line_size override the auto-detected cache line size\n" + " --cpu_freq_test enable the cpu frequency test (requires the " + "--cpu_freq_threshold argument to be set)\n" + " --cpu_freq_threshold fail the cpu frequency test if the frequency " + "goes below this value (specified in MHz)\n" + " --cpu_freq_round round the computed frequency to this value, if set" + " to zero, only round to the nearest MHz\n" " --paddr_base allocate memory starting from this address\n" " --pause_delay delay (in seconds) between power spikes\n" " --pause_duration duration (in seconds) of each pause\n" @@ -1098,12 +1154,12 @@ void Sat::PrintHelp() { "each CPU to be tested by that CPU\n" " --remote_numa choose memory regions not associated with " "each CPU to be tested by that CPU\n" - " --channel_hash mask of address bits XORed to determine channel.\n" - " Mask 0x40 interleaves cachelines between channels\n" + " --channel_hash mask of address bits XORed to determine channel. " + "Mask 0x40 interleaves cachelines between channels\n" " --channel_width bits width in bits of each memory channel\n" - " --memory_channel u1,u2 defines a comma-separated list of names\n" - " for dram packages in a memory channel.\n" - " Use multiple times to define multiple channels.\n"); + " --memory_channel u1,u2 defines a comma-separated list of names " + "for dram packages in a memory channel. Use multiple times to " + "define multiple channels.\n"); } bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) { @@ -1348,32 +1404,45 @@ void Sat::InitializeThreads() { sizeof(cc_cacheline_data) * cc_cacheline_count_); int num_cpus = CpuCount(); + char *num; + // Calculate the number of cache lines needed just to give each core + // its own counter. + int line_size = cc_cacheline_size_; + if (line_size <= 0) { + line_size = CacheLineSize(); + if (line_size < kCacheLineSize) + line_size = kCacheLineSize; + logprintf(12, "Log: Using %d as cache line size\n", line_size); + } + // The number of cache lines needed to hold an array of num_cpus. + // "num" must be the same type as cc_cacheline_data[X].num or the memory + // size calculations will fail. + int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size; // Allocate all the nums once so that we get a single chunk // of contiguous memory. - int *num; #ifdef HAVE_POSIX_MEMALIGN int err_result = posix_memalign( reinterpret_cast(&num), - kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_); + line_size, line_size * needed_lines * cc_cacheline_count_); #else - num = reinterpret_cast(memalign(kCacheLineSize, - sizeof(*num) * num_cpus * cc_cacheline_count_)); + num = reinterpret_cast(memalign( + line_size, line_size * needed_lines * cc_cacheline_count_)); int err_result = (num == 0); #endif sat_assert(err_result == 0); int cline; for (cline = 0; cline < cc_cacheline_count_; cline++) { - memset(num, 0, sizeof(num_cpus) * num_cpus); + memset(num, 0, sizeof(*num) * num_cpus); cc_cacheline_data_[cline].num = num; - num += num_cpus; + num += (line_size * needed_lines) / sizeof(*num); } int tnum; for (tnum = 0; tnum < num_cpus; tnum++) { CpuCacheCoherencyThread *thread = new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_, - tnum, cc_inc_count_); + tnum, num_cpus, cc_inc_count_); thread->InitThread(total_threads_++, this, os_, patternlist_, &continuous_status_); // Pin the thread to a particular core. @@ -1384,6 +1453,22 @@ void Sat::InitializeThreads() { } workers_map_.insert(make_pair(kCCType, cc_vector)); } + + if (cpu_freq_test_) { + // Create the frequency test thread. + logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n", + cpu_freq_threshold_); + CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_, + cpu_freq_round_); + // This thread should be paused when other threads are paused. + thread->InitThread(total_threads_++, this, os_, NULL, + &power_spike_status_); + + WorkerVector *cpu_freq_vector = new WorkerVector(); + cpu_freq_vector->insert(cpu_freq_vector->end(), thread); + workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector)); + } + ReleaseWorkerLock(); } @@ -1392,6 +1477,19 @@ int Sat::CpuCount() { return sysconf(_SC_NPROCESSORS_CONF); } +// Return the worst case (largest) cache line size of the various levels of +// cache actually prsent in the machine. +int Sat::CacheLineSize() { + int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE); + if (linesize > max_linesize) max_linesize = linesize; + linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE); + if (linesize > max_linesize) max_linesize = linesize; + linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE); + if (linesize > max_linesize) max_linesize = linesize; + return max_linesize; +} + // Notify and reap worker threads. void Sat::JoinThreads() { logprintf(12, "Log: Joining worker threads\n"); @@ -1974,3 +2072,9 @@ void logprintf(int priority, const char *format, ...) { Logger::GlobalLogger()->VLogF(priority, format, args); va_end(args); } + +// Stop the logging thread and verify any pending data is written to the log. +void logstop() { + Logger::GlobalLogger()->StopThread(); +} + diff --git a/src/sat.h b/src/sat.h index 93d6b34..92396d8 100644 --- a/src/sat.h +++ b/src/sat.h @@ -134,6 +134,8 @@ class Sat { // Return the number of cpus in the system. int CpuCount(); + // Return the worst-case (largest) cache line size of the system. + int CacheLineSize(); // Collect error counts from threads. int64 GetTotalErrorCount(); @@ -147,13 +149,15 @@ class Sat { int64 pages_; // Number of memory blocks. int64 size_; // Size of memory tested, in bytes. int64 size_mb_; // Size of memory tested, in MB. + int64 reserve_mb_; // Reserve at least this amount of memory + // for the system, in MB. int64 min_hugepages_mbytes_; // Minimum hugepages size. int64 freepages_; // How many invalid pages we need. int disk_pages_; // Number of pages per temp file. uint64 paddr_base_; // Physical address base. - vector< vector > channels_; // Memory module names per channel. uint64 channel_hash_; // Mask of address bits XORed for channel. int channel_width_; // Channel width in bits. + vector< vector > channels_; // Memory module names per channel. // Control flags. volatile sig_atomic_t user_break_; // User has signalled early exit. Used as @@ -172,6 +176,7 @@ class Sat { int use_logfile_; // Log to a file. char logfilename_[255]; // Name of file to log to. int logfile_; // File handle to log to. + bool log_timestamps_; // Whether to add timestamps to log lines. // Disk thread options. int read_block_size_; // Size of block to read from disk. @@ -202,9 +207,18 @@ class Sat { bool cc_test_; // Flag to decide whether to start the // cache coherency threads. int cc_cacheline_count_; // Number of cache line size structures. + int cc_cacheline_size_; // Size of a cache line. int cc_inc_count_; // Number of times to increment the shared // cache lines structure members. + // Cpu Frequency Options. + bool cpu_freq_test_; // Flag to decide whether to start the + // cpu frequency thread. + int cpu_freq_threshold_; // The MHz threshold which will cause + // the test to fail. + int cpu_freq_round_; // Round the computed frequency to this + // value. + // Thread control. int file_threads_; // Threads of file IO. int net_threads_; // Threads of network IO. @@ -252,7 +266,8 @@ class Sat { kRandomDiskType = 7, kCPUType = 8, kErrorType = 9, - kCCType = 10 + kCCType = 10, + kCPUFreqType = 11, }; // Helper functions. diff --git a/src/sattypes.h b/src/sattypes.h index c9341d0..e51db31 100644 --- a/src/sattypes.h +++ b/src/sattypes.h @@ -27,11 +27,11 @@ #ifdef HAVE_CONFIG_H // Built using autoconf #ifdef __ANDROID__ -#include "stressapptest_config_android.h" +#include "stressapptest_config_android.h" // NOLINT #else -#include "stressapptest_config.h" -using namespace __gnu_cxx; -#endif +#include "stressapptest_config.h" // NOLINT +using namespace __gnu_cxx; //NOLINT +#endif // __ANDROID__ using namespace std; typedef signed long long int64; @@ -57,10 +57,10 @@ inline const char* BuildChangelist() { } static const bool kOpenSource = true; -#else +#else // !HAVE_CONFIG_H static const bool kOpenSource = false; - #include "googlesattypes.h" -#endif + #include "googlesattypes.h" // NOLINT +#endif // HAVE_CONFIG_H // Workaround to allow 32/64 bit conversion // without running into strict aliasing problems. union datacast_t { @@ -75,11 +75,15 @@ union datacast_t { // File sync'd print to console and log void logprintf(int priority, const char *format, ...); +// Stop the log and dump any queued lines. +void logstop(); + // We print to stderr ourselves first in case we're in such a bad state that the // logger can't work. #define sat_assert(x) \ {\ if (!(x)) {\ + logstop();\ fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\ logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\ exit(1);\ @@ -186,6 +190,46 @@ inline string ErrorString(int error_num) { #endif } +// Execute the cpuid instruction and pass back the contents of the registers. +// This only works on x86 based platforms. +inline void cpuid( + unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { + *ebx = 0; + *ecx = 0; + *edx = 0; + // CPUID features documented at: + // http://www.sandpile.org/ia32/cpuid.htm +#if defined(STRESSAPPTEST_CPU_I686) || defined(STRESSAPPTEST_CPU_X86_64) +#if defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686) + // In PIC compilations using the i686 cpu type, ebx contains the address + // of the global offset table. The compiler can't properly handle constraints + // using the ebx register for this compile, so preserve the register + // ourselves. + asm( + "mov %%ebx, %%edi;" + "cpuid;" + "xchg %%edi, %%ebx;" + // Output registers. + : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) + // Input registers. + : "a" (*eax) + ); // Asm +#else + asm( + "cpuid;" + // Output registers. + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + // Input registers. + : "a" (*eax) + ); // Asm +#endif // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686) +#elif defined(STRESSAPPTEST_CPU_PPC) + return; +#else +#warning "Unsupported CPU type." +#endif +} + // Define handy constants here static const int kTicksPerSec = 100; static const int kMegabyte = (1024LL*1024LL); diff --git a/src/worker.cc b/src/worker.cc index d24b5cd..dcffd4e 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -78,31 +78,6 @@ _syscall3(int, sched_setaffinity, pid_t, pid, #endif namespace { - // Get HW core ID from cpuid instruction. - inline int apicid(void) { - int cpu; -#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) - __asm__ __volatile__ ( -# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__) - "xchg %%ebx, %%esi;" - "cpuid;" - "xchg %%esi, %%ebx;" - : "=S" (cpu) -# else - "cpuid;" - : "=b" (cpu) -# endif - : "a" (1) : "cx", "dx"); -#elif defined(STRESSAPPTEST_CPU_ARMV7A) - #warning "Unsupported CPU type ARMV7A: unable to determine core ID." - cpu = 0; -#else - #warning "Unsupported CPU type: unable to determine core ID." - cpu = 0; -#endif - return (cpu >> 24); - } - // Work around the sad fact that there are two (gnu, xsi) incompatible // versions of strerror_r floating around google. Awesome. bool sat_strerror(int err, char *buf, int len) { @@ -124,7 +99,7 @@ namespace { inline uint64 addr_to_tag(void *address) { return reinterpret_cast(address); } -} +} // namespace #if !defined(O_DIRECT) // Sometimes this isn't available. @@ -183,10 +158,13 @@ void WorkerStatus::StopWorkers() { WaitOnPauseBarrier(); } -bool WorkerStatus::ContinueRunning() { +bool WorkerStatus::ContinueRunning(bool *paused) { // This loop is an optimization. We use it to immediately re-check the status // after resuming from a pause, instead of returning and waiting for the next // call to this function. + if (paused) { + *paused = false; + } for (;;) { switch (GetStatus()) { case RUN: @@ -197,6 +175,10 @@ bool WorkerStatus::ContinueRunning() { WaitOnPauseBarrier(); // Wait for ResumeWorkers() to be called. WaitOnPauseBarrier(); + // Indicate that a pause occurred. + if (paused) { + *paused = true; + } break; case STOP: return false; @@ -325,8 +307,8 @@ bool WorkerThread::InitPriority() { logprintf(11, "Log: Bind to %s failed.\n", cpuset_format(&cpu_mask_).c_str()); - logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n", - thread_num_, apicid(), + logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n", + thread_num_, sched_getcpu(), CurrentCpusFormat().c_str(), cpuset_format(&cpu_mask_).c_str()); #if 0 @@ -590,7 +572,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error, const char *message) { char dimm_string[256] = ""; - int apic_id = apicid(); + int core_id = sched_getcpu(); // Determine if this is a write or read error. os_->Flush(error->vaddr); @@ -625,7 +607,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error, "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): " "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n", message, - apic_id, + core_id, CurrentCpusFormat().c_str(), error->vaddr, error->paddr, @@ -825,6 +807,9 @@ int WorkerThread::CheckRegion(void *addr, if ((state == kGoodAgain) || (state == kBad)) { unsigned int blockerrors = badend - badstart + 1; errormessage = "Block Error"; + // It's okay for the 1st entry to be corrected multiple times, + // it will simply be reported twice. Once here and once below + // when processing the error queue. ProcessError(&recorded[0], 0, errormessage.c_str()); logprintf(0, "Block Error: (%p) pattern %s instead of %s, " "%d bytes from offset 0x%x to 0x%x\n", @@ -833,8 +818,6 @@ int WorkerThread::CheckRegion(void *addr, blockerrors * wordsize_, offset + badstart * wordsize_, offset + badend * wordsize_); - errorcount_ += blockerrors; - return blockerrors; } } } @@ -850,7 +833,6 @@ int WorkerThread::CheckRegion(void *addr, if (page_error) { // For each word in the data region. - int error_recount = 0; for (int i = 0; i < length / wordsize_; i++) { uint64 actual = memblock[i]; uint64 expected; @@ -869,21 +851,16 @@ int WorkerThread::CheckRegion(void *addr, // If the value is incorrect, save an error record for later printing. if (actual != expected) { - if (error_recount < kErrorLimit) { - // We already reported these. - error_recount++; - } else { - // If we have overflowed the error queue, print the errors now. - struct ErrorRecord er; - er.actual = actual; - er.expected = expected; - er.vaddr = &memblock[i]; - - // Do the error printout. This will take a long time and - // likely change the machine state. - ProcessError(&er, 12, errormessage.c_str()); - overflowerrors++; - } + // If we have overflowed the error queue, print the errors now. + struct ErrorRecord er; + er.actual = actual; + er.expected = expected; + er.vaddr = &memblock[i]; + + // Do the error printout. This will take a long time and + // likely change the machine state. + ProcessError(&er, 12, errormessage.c_str()); + overflowerrors++; } } } @@ -958,7 +935,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error, char tag_dimm_string[256] = ""; bool read_error = false; - int apic_id = apicid(); + int core_id = sched_getcpu(); // Determine if this is a write or read error. os_->Flush(error->vaddr); @@ -992,7 +969,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error, error->tagvaddr, error->tagpaddr, tag_dimm_string, read_error ? "read error" : "write error", - apic_id, + core_id, CurrentCpusFormat().c_str(), error->vaddr, error->paddr, @@ -1110,12 +1087,18 @@ bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64, AdlerChecksum ignored_checksum; os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum); - // Force cache flush. - int length = size_in_bytes / sizeof(*dstmem64); - for (int i = 0; i < length; i += sizeof(*dstmem64)) { - os_->FastFlush(dstmem64 + i); - os_->FastFlush(srcmem64 + i); + // Force cache flush of both the source and destination addresses. + // length - length of block to flush in cachelines. + // mem_increment - number of dstmem/srcmem values per cacheline. + int length = size_in_bytes / kCacheLineSize; + int mem_increment = kCacheLineSize / sizeof(*dstmem64); + OsLayer::FastFlushSync(); + for (int i = 0; i < length; ++i) { + OsLayer::FastFlushHint(dstmem64 + (i * mem_increment)); + OsLayer::FastFlushHint(srcmem64 + (i * mem_increment)); } + OsLayer::FastFlushSync(); + // Check results. AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe); // Patch up address tags. @@ -1246,11 +1229,11 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe, blocksize, currentblock * blocksize, 0); if (errorcount == 0) { - int apic_id = apicid(); + int core_id = sched_getcpu(); logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage " "CRC mismatch %s != %s, " "but no miscompares found on second pass.\n", - apic_id, CurrentCpusFormat().c_str(), + core_id, CurrentCpusFormat().c_str(), crc.ToHexString().c_str(), expectedcrc->ToHexString().c_str()); struct ErrorRecord er; @@ -1390,11 +1373,11 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe, blocksize, currentblock * blocksize, 0); if (errorcount == 0) { - int apic_id = apicid(); + int core_id = sched_getcpu(); logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage " "CRC mismatch %s != %s, " "but no miscompares found on second pass.\n", - apic_id, CurrentCpusFormat().c_str(), + core_id, CurrentCpusFormat().c_str(), crc.ToHexString().c_str(), expectedcrc->ToHexString().c_str()); struct ErrorRecord er; @@ -1610,12 +1593,11 @@ void FileThread::SetFile(const char *filename_init) { // Open the file for access. bool FileThread::OpenFile(int *pfile) { - bool no_O_DIRECT = false; int flags = O_RDWR | O_CREAT | O_SYNC; int fd = open(filename_.c_str(), flags | O_DIRECT, 0644); if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { - no_O_DIRECT = true; - fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT + fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT + os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL } if (fd < 0) { logprintf(0, "Process Error: Failed to create file %s!!\n", @@ -1623,8 +1605,6 @@ bool FileThread::OpenFile(int *pfile) { pages_copied_ = 0; return false; } - if (no_O_DIRECT) - os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL *pfile = fd; return true; } @@ -1695,7 +1675,7 @@ bool FileThread::WritePages(int fd) { if (!result) return false; } - return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP. + return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP. } // Copy data from file into memory block. @@ -2475,13 +2455,22 @@ bool CpuStressThread::Work() { CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data, int cacheline_count, int thread_num, + int thread_count, int inc_count) { cc_cacheline_data_ = data; cc_cacheline_count_ = cacheline_count; cc_thread_num_ = thread_num; + cc_thread_count_ = thread_count; cc_inc_count_ = inc_count; } +// A very simple psuedorandom generator. Since the random number is based +// on only a few simple logic operations, it can be done quickly in registers +// and the compiler can inline it. +uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) { + return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial); +} + // Worked thread to test the cache coherency of the CPUs // Return false on fatal sw error. bool CpuCacheCoherencyThread::Work() { @@ -2490,7 +2479,19 @@ bool CpuCacheCoherencyThread::Work() { uint64 time_start, time_end; struct timeval tv; + // Use a slightly more robust random number for the initial + // value, so the random sequences from the simple generator will + // be more divergent. +#ifdef HAVE_RAND_R unsigned int seed = static_cast(gettid()); + uint64 r = static_cast(rand_r(&seed)); + r |= static_cast(rand_r(&seed)) << 32; +#else + srand(time(NULL)); + uint64 r = static_cast(rand()); // NOLINT + r |= static_cast(rand()) << 32; // NOLINT +#endif + gettimeofday(&tv, NULL); // Get the timestamp before increments. time_start = tv.tv_sec * 1000000ULL + tv.tv_usec; @@ -2500,14 +2501,19 @@ bool CpuCacheCoherencyThread::Work() { // Choose a datastructure in random and increment the appropriate // member in that according to the offset (which is the same as the // thread number. -#ifdef HAVE_RAND_R - int r = rand_r(&seed); -#else - int r = rand(); -#endif - r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0)); + r = SimpleRandom(r); + int cline_num = r % cc_cacheline_count_; + int offset; + // Reverse the order for odd numbered threads in odd numbered cache + // lines. This is designed for massively multi-core systems where the + // number of cores exceeds the bytes in a cache line, so "distant" cores + // get a chance to exercize cache coherency between them. + if (cline_num & cc_thread_num_ & 1) + offset = (cc_thread_count_ & ~1) - cc_thread_num_; + else + offset = cc_thread_num_; // Increment the member of the randomely selected structure. - (cc_cacheline_data_[r].num[cc_thread_num_])++; + (cc_cacheline_data_[cline_num].num[offset])++; } total_inc += cc_inc_count_; @@ -2516,14 +2522,26 @@ bool CpuCacheCoherencyThread::Work() { // in all the cache line structures for this particular thread. int cc_global_num = 0; for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) { - cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_]; + int offset; + // Perform the same offset calculation from above. + if (cline_num & cc_thread_num_ & 1) + offset = (cc_thread_count_ & ~1) - cc_thread_num_; + else + offset = cc_thread_num_; + cc_global_num += cc_cacheline_data_[cline_num].num[offset]; // Reset the cachline member's value for the next run. - cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0; + cc_cacheline_data_[cline_num].num[offset] = 0; } if (sat_->error_injection()) cc_global_num = -1; - if (cc_global_num != cc_inc_count_) { + // Since the count is only stored in a byte, to squeeze more into a + // single cache line, only compare it as a byte. In the event that there + // is something detected, the chance that it would be missed by a single + // thread is 1 in 256. If it affects all cores, that makes the chance + // of it being missed terribly minute. It seems unlikely any failure + // case would be off by more than a small number. + if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) { errorcount_++; logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n", cc_global_num, cc_inc_count_); @@ -2707,20 +2725,17 @@ bool DiskThread::SetParameters(int read_block_size, // Open a device, return false on failure. bool DiskThread::OpenDevice(int *pfile) { - bool no_O_DIRECT = false; int flags = O_RDWR | O_SYNC | O_LARGEFILE; int fd = open(device_name_.c_str(), flags | O_DIRECT, 0); if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { - no_O_DIRECT = true; - fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT + fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT + os_->ActivateFlushPageCache(); } if (fd < 0) { logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n", device_name_.c_str(), thread_num_); return false; } - if (no_O_DIRECT) - os_->ActivateFlushPageCache(); *pfile = fd; return GetDiskSize(fd); @@ -2876,11 +2891,11 @@ bool DiskThread::DoWork(int fd) { // Block is either initialized by writing, or in nondestructive case, // initialized by being added into the datastructure for later reading. - block->SetBlockAsInitialized(); + block->initialized(); in_flight_sectors_.push(block); } - if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP. + if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP. return false; // Verify blocks on disk. @@ -2989,8 +3004,9 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, errorcount_++; os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); - if (event.res < 0) { - switch (event.res) { + int64 result = static_cast(event.res); + if (result < 0) { + switch (result) { case -EIO: logprintf(0, "Hardware Error: Low-level I/O error while doing %s to " "sectors starting at %lld on disk %s (thread %d).\n", @@ -3013,7 +3029,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, } return true; -#else // !HAVE_LIBAIO_H +#else // !HAVE_LIBAIO_H return false; #endif } @@ -3021,7 +3037,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, // Write a block to disk. // Return false if the block is not written. bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { - memset(block_buffer_, 0, block->GetSize()); + memset(block_buffer_, 0, block->size()); // Fill block buffer with a pattern struct page_entry pe; @@ -3029,30 +3045,30 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { // Even though a valid page could not be obatined, it is not an error // since we can always fill in a pattern directly, albeit slower. unsigned int *memblock = static_cast(block_buffer_); - block->SetPattern(patternlist_->GetRandomPattern()); + block->set_pattern(patternlist_->GetRandomPattern()); logprintf(11, "Log: Warning, using pattern fill fallback in " "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n", device_name_.c_str(), thread_num_); - for (int i = 0; i < block->GetSize()/wordsize_; i++) { - memblock[i] = block->GetPattern()->pattern(i); + for (unsigned int i = 0; i < block->size()/wordsize_; i++) { + memblock[i] = block->pattern()->pattern(i); } } else { - memcpy(block_buffer_, pe.addr, block->GetSize()); - block->SetPattern(pe.pattern); + memcpy(block_buffer_, pe.addr, block->size()); + block->set_pattern(pe.pattern); sat_->PutValid(&pe); } logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s" " (thread %d).\n", - block->GetSize()/kSectorSize, block->GetAddress(), + block->size()/kSectorSize, block->address(), device_name_.c_str(), thread_num_); int64 start_time = GetTime(); - if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(), - block->GetAddress() * kSectorSize, write_timeout_)) { + if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(), + block->address() * kSectorSize, write_timeout_)) { return false; } @@ -3073,11 +3089,11 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { // Return true if the block was read, also increment errorcount // if the block had data errors or performance problems. bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) { - int64 blocks = block->GetSize() / read_block_size_; + int64 blocks = block->size() / read_block_size_; int64 bytes_read = 0; int64 current_blocks; int64 current_bytes; - uint64 address = block->GetAddress(); + uint64 address = block->address(); logprintf(20, "Log: Reading sectors starting at %lld on disk %s " "(thread %d).\n", @@ -3129,7 +3145,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) { // In non-destructive mode, don't compare the block to the pattern since // the block was never written to disk in the first place. if (!non_destructive_) { - if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes, + if (CheckRegion(block_buffer_, block->pattern(), current_bytes, 0, bytes_read)) { os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1); errorcount_ += 1; @@ -3166,7 +3182,7 @@ bool DiskThread::Work() { // when using direct IO. #ifdef HAVE_POSIX_MEMALIGN int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment, - sat_->page_length()); + sat_->page_length()); #else block_buffer_ = memalign(kBufferAlignment, sat_->page_length()); int memalign_result = (block_buffer_ == 0); @@ -3410,3 +3426,224 @@ bool MemoryRegionThread::Work() { "pages checked\n", thread_num_, status_, pages_copied_); return result; } + +// The list of MSRs to read from each cpu. +const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = { + { kMsrTscAddr, "TSC" }, + { kMsrAperfAddr, "APERF" }, + { kMsrMperfAddr, "MPERF" }, +}; + +CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round) + : num_cpus_(num_cpus), + freq_threshold_(freq_threshold), + round_(round) { + sat_assert(round >= 0); + if (round == 0) { + // If rounding is off, force rounding to the nearest MHz. + round_ = 1; + round_value_ = 0.5; + } else { + round_value_ = round/2.0; + } +} + +CpuFreqThread::~CpuFreqThread() { +} + +// Compute the difference between the currently read MSR values and the +// previously read values and store the results in delta. If any of the +// values did not increase, or the TSC value is too small, returns false. +// Otherwise, returns true. +bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous, + CpuDataType *delta) { + // Loop through the msrs. + for (int msr = 0; msr < kMsrLast; msr++) { + if (previous->msrs[msr] > current->msrs[msr]) { + logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx " + "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr], + current->msrs[msr]); + return false; + } else { + delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr]; + } + } + + // Check for TSC < 1 Mcycles over interval. + if (delta->msrs[kMsrTsc] < (1000 * 1000)) { + logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n"); + return false; + } + timersub(¤t->tv, &previous->tv, &delta->tv); + + return true; +} + +// Compute the change in values of the MSRs between current and previous, +// set the frequency in MHz of the cpu. If there is an error computing +// the delta, return false. Othewise, return true. +bool CpuFreqThread::ComputeFrequency(CpuDataType *current, + CpuDataType *previous, int *freq) { + CpuDataType delta; + if (!ComputeDelta(current, previous, &delta)) { + return false; + } + + double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0; + double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000 + * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval; + + // Use the rounding value to round up properly. + int computed = static_cast(frequency + round_value_); + *freq = computed - (computed % round_); + return true; +} + +// This is the task function that the thread executes. +bool CpuFreqThread::Work() { + cpu_set_t cpuset; + if (!AvailableCpus(&cpuset)) { + logprintf(0, "Process Error: Cannot get information about the cpus.\n"); + return false; + } + + // Start off indicating the test is passing. + status_ = true; + + int curr = 0; + int prev = 1; + uint32 num_intervals = 0; + bool paused = false; + bool valid; + bool pass = true; + + vector data[2]; + data[0].resize(num_cpus_); + data[1].resize(num_cpus_); + while (IsReadyToRun(&paused)) { + if (paused) { + // Reset the intervals and restart logic after the pause. + num_intervals = 0; + } + if (num_intervals == 0) { + // If this is the first interval, then always wait a bit before + // starting to collect data. + sat_sleep(kStartupDelay); + } + + // Get the per cpu counters. + valid = true; + for (int cpu = 0; cpu < num_cpus_; cpu++) { + if (CPU_ISSET(cpu, &cpuset)) { + if (!GetMsrs(cpu, &data[curr][cpu])) { + logprintf(0, "Failed to get msrs on cpu %d.\n", cpu); + valid = false; + break; + } + } + } + if (!valid) { + // Reset the number of collected intervals since something bad happened. + num_intervals = 0; + continue; + } + + num_intervals++; + + // Only compute a delta when we have at least two intervals worth of data. + if (num_intervals > 2) { + for (int cpu = 0; cpu < num_cpus_; cpu++) { + if (CPU_ISSET(cpu, &cpuset)) { + int freq; + if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu], + &freq)) { + // Reset the number of collected intervals since an unknown + // error occurred. + logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu); + num_intervals = 0; + break; + } + logprintf(15, "Cpu %d Freq %d\n", cpu, freq); + if (freq < freq_threshold_) { + errorcount_++; + pass = false; + logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz " + "threshold %d MHz.\n", cpu, freq, freq_threshold_); + } + } + } + } + + sat_sleep(kIntervalPause); + + // Swap the values in curr and prev (these values flip between 0 and 1). + curr ^= 1; + prev ^= 1; + } + + return pass; +} + + +// Get the MSR values for this particular cpu and save them in data. If +// any error is encountered, returns false. Otherwise, returns true. +bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) { + for (int msr = 0; msr < kMsrLast; msr++) { + if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) { + return false; + } + } + // Save the time at which we acquired these values. + gettimeofday(&data->tv, NULL); + + return true; +} + +// Returns true if this test can run on the current machine. Otherwise, +// returns false. +bool CpuFreqThread::CanRun() { +#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + unsigned int eax, ebx, ecx, edx; + + // Check that the TSC feature is supported. + // This check is valid for both Intel and AMD. + eax = 1; + cpuid(&eax, &ebx, &ecx, &edx); + if (!(edx & (1 << 5))) { + logprintf(0, "Process Error: No TSC support.\n"); + return false; + } + + // Check the highest extended function level supported. + // This check is valid for both Intel and AMD. + eax = 0x80000000; + cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x80000007) { + logprintf(0, "Process Error: No invariant TSC support.\n"); + return false; + } + + // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + // This check is valid for both Intel and AMD. + eax = 0x80000007; + cpuid(&eax, &ebx, &ecx, &edx); + if ((edx & (1 << 8)) == 0) { + logprintf(0, "Process Error: No non-stop TSC support.\n"); + return false; + } + + // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 + // This check is valid for both Intel and AMD. + eax = 0x6; + cpuid(&eax, &ebx, &ecx, &edx); + if ((ecx & 1) == 0) { + logprintf(0, "Process Error: No APERF MSR support.\n"); + return false; + } + return true; +#else + logprintf(0, "Process Error: " + "cpu_freq_test is only supported on X86 processors.\n"); + return false; +#endif +} diff --git a/src/worker.h b/src/worker.h index 31e0225..6f9fde7 100644 --- a/src/worker.h +++ b/src/worker.h @@ -44,7 +44,7 @@ // Global Datastruture shared by the Cache Coherency Worker Threads. struct cc_cacheline_data { - int *num; + char *num; }; // Typical usage: @@ -127,10 +127,8 @@ class WorkerStatus { // ResumeWorkers() or StopWorkers() has been called. Number of distinct // calling threads must match the worker count (see AddWorkers() and // RemoveSelf()). - bool ContinueRunning(); + bool ContinueRunning(bool *paused); - // TODO(matthewb): Is this functionality really necessary? Remove it if not. - // // This is a hack! It's like ContinueRunning(), except it won't pause. If // any worker threads use this exclusively in place of ContinueRunning() then // PauseWorkers() should never be used! @@ -304,9 +302,10 @@ class WorkerThread { // do { // // work. // } while (IsReadyToRun()); - virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); } - // TODO(matthewb): Is this function really necessary? Remove it if not. - // + virtual bool IsReadyToRun(bool *paused = NULL) { + return worker_status_->ContinueRunning(paused); + } + // Like IsReadyToRun(), except it won't pause. virtual bool IsReadyToRunNoPause() { return worker_status_->ContinueRunningNoPause(); @@ -641,16 +640,27 @@ class CpuCacheCoherencyThread : public WorkerThread { CpuCacheCoherencyThread(cc_cacheline_data *cc_data, int cc_cacheline_count_, int cc_thread_num_, + int cc_thread_count_, int cc_inc_count_); virtual bool Work(); protected: + // Used by the simple random number generator as a shift feedback; + // this polynomial (x^64 + x^63 + x^61 + x^60 + 1) will produce a + // psuedorandom cycle of period 2^64-1. + static const uint64 kRandomPolynomial = 0xD800000000000000ULL; + // A very simple psuedorandom generator that can be inlined and use + // registers, to keep the CC test loop tight and focused. + static uint64 SimpleRandom(uint64 seed); + cc_cacheline_data *cc_cacheline_data_; // Datstructure for each cacheline. int cc_local_num_; // Local counter for each thread. int cc_cacheline_count_; // Number of cache lines to operate on. int cc_thread_num_; // The integer id of the thread which is // used as an index into the integer array // of the cacheline datastructure. + int cc_thread_count_; // Total number of threads being run, for + // calculations mixing up cache line access. int cc_inc_count_; // Number of times to increment the counter. private: @@ -809,4 +819,80 @@ class MemoryRegionThread : public WorkerThread { DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread); }; +// Worker thread to check that the frequency of every cpu does not go below a +// certain threshold. +class CpuFreqThread : public WorkerThread { + public: + CpuFreqThread(int num_cpus, int freq_threshold, int round); + ~CpuFreqThread(); + + // This is the task function that the thread executes. + virtual bool Work(); + + // Returns true if this test can run on the current machine. Otherwise, + // returns false. + static bool CanRun(); + + private: + static const int kIntervalPause = 10; // The number of seconds to pause + // between acquiring the MSR data. + static const int kStartupDelay = 5; // The number of seconds to wait + // before acquiring MSR data. + static const int kMsrTscAddr = 0x10; // The address of the TSC MSR. + static const int kMsrAperfAddr = 0xE8; // The address of the APERF MSR. + static const int kMsrMperfAddr = 0xE7; // The address of the MPERF MSR. + + // The index values into the CpuDataType.msr[] array. + enum MsrValues { + kMsrTsc = 0, // MSR index 0 = TSC. + kMsrAperf = 1, // MSR index 1 = APERF. + kMsrMperf = 2, // MSR index 2 = MPERF. + kMsrLast, // Last MSR index. + }; + + typedef struct { + uint32 msr; // The address of the MSR. + const char *name; // A human readable string for the MSR. + } CpuRegisterType; + + typedef struct { + uint64 msrs[kMsrLast]; // The values of the MSRs. + struct timeval tv; // The time at which the MSRs were read. + } CpuDataType; + + // The set of MSR addresses and register names. + static const CpuRegisterType kCpuRegisters[kMsrLast]; + + // Compute the change in values of the MSRs between current and previous, + // set the frequency in MHz of the cpu. If there is an error computing + // the delta, return false. Othewise, return true. + bool ComputeFrequency(CpuDataType *current, CpuDataType *previous, + int *frequency); + + // Get the MSR values for this particular cpu and save them in data. If + // any error is encountered, returns false. Otherwise, returns true. + bool GetMsrs(int cpu, CpuDataType *data); + + // Compute the difference between the currently read MSR values and the + // previously read values and store the results in delta. If any of the + // values did not increase, or the TSC value is too small, returns false. + // Otherwise, returns true. + bool ComputeDelta(CpuDataType *current, CpuDataType *previous, + CpuDataType *delta); + + // The total number of cpus on the system. + int num_cpus_; + + // The minimum frequency that each cpu must operate at (in MHz). + int freq_threshold_; + + // The value to round the computed frequency to. + int round_; + + // Precomputed value to add to the frequency to do the rounding. + double round_value_; + + DISALLOW_COPY_AND_ASSIGN(CpuFreqThread); +}; + #endif // STRESSAPPTEST_WORKER_H_ diff --git a/stressapptest.1 b/stressapptest.1 index 695f9ee..2c91478 100644 --- a/stressapptest.1 +++ b/stressapptest.1 @@ -86,9 +86,14 @@ Number of times to increment the cacheline's member. .TP .B \-\-cc_line_count -Mumber of cache line sized datastructures to allocate for the cache coherency +Number of cache line sized datastructures to allocate for the cache coherency threads to operate. +.TP +.B \-\-cc_line_size +Size of cache line to use as the basis for cache coherency test data +structures. + .TP .B \-\-cc_test Do the cache coherency testing.