From 6d1e64db329883e43dbca06471c093fc23dc9a2e Mon Sep 17 00:00:00 2001 From: "nick.j.sanders" Date: Fri, 14 May 2010 03:47:11 +0000 Subject: [PATCH] Update stressapptest to 1.0.3. * /dev/shm support to allow >1.4G memory usage for 32 bit app. * Some arm support. * x86 SSE support. --- configure | 205 +++++++++++---------------- configure.ac | 149 ++++++++++--------- src/adler32memcpy.cc | 124 +++++++++------- src/disk_blocks.h | 2 +- src/finelock_queue.cc | 12 +- src/finelock_queue.h | 6 +- src/logger.cc | 5 +- src/logger.h | 2 +- src/os.cc | 259 ++++++++++++++++++++++++++++------ src/os.h | 21 ++- src/pattern.cc | 2 +- src/pattern.h | 2 +- src/sat.cc | 31 +--- src/sat.h | 2 +- src/stressapptest_config.h.in | 3 + src/worker.cc | 28 ++-- 16 files changed, 511 insertions(+), 342 deletions(-) diff --git a/configure b/configure index eda792f..59d076e 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.61 for stressapptest 1.0.2_autoconf. +# Generated by GNU Autoconf 2.61 for stressapptest 1.0.3_autoconf. # # Report bugs to . # @@ -574,8 +574,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='stressapptest' PACKAGE_TARNAME='stressapptest' -PACKAGE_VERSION='1.0.2_autoconf' -PACKAGE_STRING='stressapptest 1.0.2_autoconf' +PACKAGE_VERSION='1.0.3_autoconf' +PACKAGE_STRING='stressapptest 1.0.3_autoconf' PACKAGE_BUGREPORT='opensource@google.com' ac_unique_file="src/" @@ -1228,7 +1228,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures stressapptest 1.0.2_autoconf to adapt to many kinds of systems. +\`configure' configures stressapptest 1.0.3_autoconf to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1299,7 +1299,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of stressapptest 1.0.2_autoconf:";; + short | recursive ) echo "Configuration of stressapptest 1.0.3_autoconf:";; esac cat <<\_ACEOF @@ -1385,7 +1385,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -stressapptest configure 1.0.2_autoconf +stressapptest configure 1.0.3_autoconf generated by GNU Autoconf 2.61 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1399,7 +1399,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by stressapptest $as_me 1.0.2_autoconf, which was +It was created by stressapptest $as_me 1.0.3_autoconf, which was generated by GNU Autoconf 2.61. Invocation command line was $ $0 $@ @@ -1959,70 +1959,82 @@ test -n "$target_alias" && NONENONEs,x,x, && program_prefix=${target_alias}- case x"$target_cpu" in -"xx86_64") + "xx86_64") cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_CPU_X86_64 _ACEOF -;; -"xi686") + ;; + "xi686") cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_CPU_I686 _ACEOF -;; -"xpowerpc") + ;; + "xpowerpc") cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_CPU_PPC _ACEOF -;; + ;; + "xarmv7a") + +cat >>confdefs.h <<\_ACEOF +#define STRESSAPPTEST_CPU_ARMV7A +_ACEOF + + ;; + *) + { { echo "$as_me:$LINENO: error: $target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a" >&5 +echo "$as_me: error: $target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a" >&2;} + { (exit 1); exit 1; }; } + ;; esac _os=`uname` ## The following allows like systems to share settings. This is not meant to ## imply that these OS are the same thing. From OpenOffice dmake configure.in case "$_os" in - "Linux") - OS_VERSION=linux + "Linux") + OS_VERSION=linux cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_OS_LINUX _ACEOF - ;; - "Darwin") - OS_VERSION=macosx + ;; + "Darwin") + OS_VERSION=macosx cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_OS_DARWIN _ACEOF - ;; - "FreeBSD") - OS_VERSION=bsd + ;; + "FreeBSD") + OS_VERSION=bsd cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_OS_BSD _ACEOF - ;; - "NetBSD") - OS_VERSION=bsd + ;; + "NetBSD") + OS_VERSION=bsd cat >>confdefs.h <<\_ACEOF #define STRESSAPPTEST_OS_BSD _ACEOF - ;; - *) - { { echo "$as_me:$LINENO: error: $_os operating system is not suitable to build dmake!" >&5 + ;; + *) + { { echo "$as_me:$LINENO: error: $_os operating system is not suitable to build dmake!" >&5 echo "$as_me: error: $_os operating system is not suitable to build dmake!" >&2;} { (exit 1); exit 1; }; } - ;; + ;; esac am__api_version='1.10' @@ -2331,7 +2343,7 @@ fi # Define the identity of the package. PACKAGE='stressapptest' - VERSION='1.0.2_autoconf' + VERSION='1.0.3_autoconf' cat >>confdefs.h <<_ACEOF @@ -4074,7 +4086,7 @@ _ACEOF #Default cxxflags CXXFLAGS="$CXXFLAGS -DCHECKOPTS" -CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall" +CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall -Wno-psabi" CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG" # Checks for header files. @@ -6015,10 +6027,17 @@ _ACEOF fi + +# These are the libraries stressapptest requires to build. +# We'll check that they work, and fail otherwise. +# In the future we may provide for testing alternate +# arguments, but that's not necessary now. +LIBS="$LIBS -static -lrt -pthread -laio" + # Checking for pthreads pthread_arg="not_available" -{ echo "$as_me:$LINENO: checking which argument is required to compile pthreads" >&5 -echo $ECHO_N "checking which argument is required to compile pthreads... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: checking if pthreads is supported" >&5 +echo $ECHO_N "checking if pthreads is supported... $ECHO_C" >&6; } pthread_header="#include" pthread_body="pthread_create(0,0,0,0)" @@ -6068,79 +6087,20 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test x"$pthread_arg" = x"not_available"; then - # At first, only -pthread was tested, but this is the place - # to add extra pthread flags if someone can test them - bkp_LDFLAGS="$LDFLAGS" - for altheader in -pthread; do - LDFLAGS="$bkp_LDFLAGS $altheader" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$pthread_header -int -main () -{ -$pthread_body - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && - $as_test_x conftest$ac_exeext; then - pthread_arg="$altheader" -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ - conftest$ac_exeext conftest.$ac_ext - LDFLAGS="$bkp_LDFLAGS" - done -fi - -if test x"$pthread_arg" = x"not_available"; then - { { echo "$as_me:$LINENO: error: Cannot find a proper pthread library + { { echo "$as_me:$LINENO: error: Cannot find a proper pthread library See \`config.log' for more details." >&5 echo "$as_me: error: Cannot find a proper pthread library See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } else - if test x"$pthread_arg" = x; then - { echo "$as_me:$LINENO: result: none" >&5 -echo "${ECHO_T}none" >&6; } - else - { echo "$as_me:$LINENO: result: $pthread_arg" >&5 -echo "${ECHO_T}$pthread_arg" >&6; } - fi - LDFLAGS="$LDFLAGS $pthread_arg" + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } fi # Checking for libaio libaio_arg="not_available" -{ echo "$as_me:$LINENO: checking which argument is required to compile libaio" >&5 -echo $ECHO_N "checking which argument is required to compile libaio... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: checking if libaio is supported" >&5 +echo $ECHO_N "checking if libaio is supported... $ECHO_C" >&6; } libaio_header="#include" libaio_body="io_submit(0,0,0)" @@ -6190,20 +6150,35 @@ rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test x"$libaio_arg" = x"not_available"; then - bkp_LDFLAGS="$LDFLAGS" - for altheader in -laio; do - LDFLAGS="$bkp_LDFLAGS $altheader" - cat >conftest.$ac_ext <<_ACEOF + { { echo "$as_me:$LINENO: error: Cannot find libaio library, please install libaio-dev +See \`config.log' for more details." >&5 +echo "$as_me: error: Cannot find libaio library, please install libaio-dev +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +else + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } +fi + +# Checking for librt +librt_arg="not_available" +{ echo "$as_me:$LINENO: checking if librt is supported" >&5 +echo $ECHO_N "checking if librt is supported... $ECHO_C" >&6; } + +librt_header="#include" +librt_body="shm_open(0, 0, 0)" +# Check if compile with no extra argument +cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -$libaio_header +$librt_header int main () { -$libaio_body +$librt_body ; return 0; } @@ -6226,7 +6201,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then - libaio_arg="$altheader" + librt_arg="" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 @@ -6236,27 +6211,19 @@ fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext - LDFLAGS="$bkp_LDFLAGS" - done -fi -if test x"$libaio_arg" = x"not_available"; then - { { echo "$as_me:$LINENO: error: Cannot find libaio library, please install libaio-dev +if test x"$librt_arg" = x"not_available"; then + { { echo "$as_me:$LINENO: error: Cannot find librt library See \`config.log' for more details." >&5 -echo "$as_me: error: Cannot find libaio library, please install libaio-dev +echo "$as_me: error: Cannot find librt library See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } else - if test x"$libaio_arg" = x; then - { echo "$as_me:$LINENO: result: none" >&5 -echo "${ECHO_T}none" >&6; } - else - { echo "$as_me:$LINENO: result: $libaio_arg" >&5 -echo "${ECHO_T}$libaio_arg" >&6; } - fi - LDFLAGS="$LDFLAGS $libaio_arg" + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } fi + # Checks for library functions. { echo "$as_me:$LINENO: checking whether closedir returns void" >&5 echo $ECHO_N "checking whether closedir returns void... $ECHO_C" >&6; } @@ -7662,7 +7629,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by stressapptest $as_me 1.0.2_autoconf, which was +This file was extended by stressapptest $as_me 1.0.3_autoconf, which was generated by GNU Autoconf 2.61. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7715,7 +7682,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -stressapptest config.status 1.0.2_autoconf +stressapptest config.status 1.0.3_autoconf configured by $0, generated by GNU Autoconf 2.61, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index 5011c89..38dc59c 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ AC_PREREQ(2.61) -AC_INIT(stressapptest, 1.0.2_autoconf, opensource@google.com) +AC_INIT(stressapptest, 1.0.3_autoconf, opensource@google.com) AC_CANONICAL_HOST AC_CANONICAL_BUILD @@ -7,47 +7,54 @@ AC_CANONICAL_BUILD # for the different platforms AC_CANONICAL_TARGET case x"$target_cpu" in -"xx86_64") - AC_DEFINE([STRESSAPPTEST_CPU_X86_64],[], - [Defined if the target CPU is x86_64]) -;; -"xi686") - AC_DEFINE([STRESSAPPTEST_CPU_I686],[], - [Defined if the target CPU is i686]) -;; -"xpowerpc") - AC_DEFINE([STRESSAPPTEST_CPU_PPC],[], - [Defined if the target CPU is PowerPC]) -;; + "xx86_64") + AC_DEFINE([STRESSAPPTEST_CPU_X86_64],[], + [Defined if the target CPU is x86_64]) + ;; + "xi686") + AC_DEFINE([STRESSAPPTEST_CPU_I686],[], + [Defined if the target CPU is i686]) + ;; + "xpowerpc") + AC_DEFINE([STRESSAPPTEST_CPU_PPC],[], + [Defined if the target CPU is PowerPC]) + ;; + "xarmv7a") + AC_DEFINE([STRESSAPPTEST_CPU_ARMV7A],[], + [Defined if the target CPU is armv7a]) + ;; + *) + AC_MSG_ERROR([$target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a]) + ;; esac _os=`uname` ## The following allows like systems to share settings. This is not meant to ## imply that these OS are the same thing. From OpenOffice dmake configure.in case "$_os" in - "Linux") - OS_VERSION=linux - AC_DEFINE([STRESSAPPTEST_OS_LINUX],[], - [Defined if the target OS is Linux]) - ;; - "Darwin") - OS_VERSION=macosx - AC_DEFINE([STRESSAPPTEST_OS_DARWIN],[], - [Defined if the target OS is OSX]) - ;; - "FreeBSD") - OS_VERSION=bsd - AC_DEFINE([STRESSAPPTEST_OS_BSD],[], - [Defined if the target OS is BSD based]) - ;; - "NetBSD") - OS_VERSION=bsd - AC_DEFINE([STRESSAPPTEST_OS_BSD],[], - [Defined if the target OS is BSD based]) - ;; - *) - AC_MSG_ERROR([$_os operating system is not suitable to build dmake!]) - ;; + "Linux") + OS_VERSION=linux + AC_DEFINE([STRESSAPPTEST_OS_LINUX],[], + [Defined if the target OS is Linux]) + ;; + "Darwin") + OS_VERSION=macosx + AC_DEFINE([STRESSAPPTEST_OS_DARWIN],[], + [Defined if the target OS is OSX]) + ;; + "FreeBSD") + OS_VERSION=bsd + AC_DEFINE([STRESSAPPTEST_OS_BSD],[], + [Defined if the target OS is BSD based]) + ;; + "NetBSD") + OS_VERSION=bsd + AC_DEFINE([STRESSAPPTEST_OS_BSD],[], + [Defined if the target OS is BSD based]) + ;; + *) + AC_MSG_ERROR([$_os operating system is not suitable to build dmake!]) + ;; esac AM_INIT_AUTOMAKE([-Wall -Werror foreign]) @@ -75,12 +82,12 @@ AC_MSG_CHECKING([current timestamp]) AC_MSG_RESULT([$timestamp]) AC_DEFINE_UNQUOTED([STRESSAPPTEST_TIMESTAMP], - "$username @ $hostname on $timestamp", - [Timestamp when ./configure was executed]) + "$username @ $hostname on $timestamp", + [Timestamp when ./configure was executed]) #Default cxxflags CXXFLAGS="$CXXFLAGS -DCHECKOPTS" -CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall" +CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall -Wno-psabi" CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG" # Checks for header files. @@ -100,9 +107,16 @@ AC_HEADER_TIME AC_TYPE_UINT16_T AC_C_VOLATILE + +# These are the libraries stressapptest requires to build. +# We'll check that they work, and fail otherwise. +# In the future we may provide for testing alternate +# arguments, but that's not necessary now. +LIBS="$LIBS -static -lrt -pthread -laio" + # Checking for pthreads pthread_arg="not_available" -AC_MSG_CHECKING([which argument is required to compile pthreads]) +AC_MSG_CHECKING([if pthreads is supported]) pthread_header="#include" pthread_body="pthread_create(0,0,0,0)" @@ -111,31 +125,14 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM($pthread_header, $pthread_body)], pthread_arg="") if test x"$pthread_arg" = x"not_available"; then - # At first, only -pthread was tested, but this is the place - # to add extra pthread flags if someone can test them - bkp_LDFLAGS="$LDFLAGS" - for altheader in -pthread; do - LDFLAGS="$bkp_LDFLAGS $altheader" - AC_LINK_IFELSE([AC_LANG_PROGRAM($pthread_header, $pthread_body)], - pthread_arg="$altheader") - LDFLAGS="$bkp_LDFLAGS" - done -fi - -if test x"$pthread_arg" = x"not_available"; then - AC_MSG_FAILURE([Cannot find a proper pthread library]) + AC_MSG_FAILURE([Cannot find a proper pthread library]) else - if test x"$pthread_arg" = x; then - AC_MSG_RESULT([none]) - else - AC_MSG_RESULT([$pthread_arg]) - fi - LDFLAGS="$LDFLAGS $pthread_arg" + AC_MSG_RESULT([yes]) fi # Checking for libaio libaio_arg="not_available" -AC_MSG_CHECKING([which argument is required to compile libaio]) +AC_MSG_CHECKING([if libaio is supported]) libaio_header="#include" libaio_body="io_submit(0,0,0)" @@ -144,26 +141,28 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM($libaio_header, $libaio_body)], libaio_arg="") if test x"$libaio_arg" = x"not_available"; then - bkp_LDFLAGS="$LDFLAGS" - for altheader in -laio; do - LDFLAGS="$bkp_LDFLAGS $altheader" - AC_LINK_IFELSE([AC_LANG_PROGRAM($libaio_header, $libaio_body)], - libaio_arg="$altheader") - LDFLAGS="$bkp_LDFLAGS" - done + AC_MSG_FAILURE([Cannot find libaio library, please install libaio-dev]) +else + AC_MSG_RESULT([yes]) fi -if test x"$libaio_arg" = x"not_available"; then - AC_MSG_FAILURE([Cannot find libaio library, please install libaio-dev]) +# Checking for librt +librt_arg="not_available" +AC_MSG_CHECKING([if librt is supported]) + +librt_header="#include" +librt_body="shm_open(0, 0, 0)" +# Check if compile with no extra argument +AC_LINK_IFELSE([AC_LANG_PROGRAM($librt_header, $librt_body)], +librt_arg="") + +if test x"$librt_arg" = x"not_available"; then + AC_MSG_FAILURE([Cannot find librt library]) else - if test x"$libaio_arg" = x; then - AC_MSG_RESULT([none]) - else - AC_MSG_RESULT([$libaio_arg]) - fi - LDFLAGS="$LDFLAGS $libaio_arg" + AC_MSG_RESULT([yes]) fi + # Checks for library functions. AC_FUNC_CLOSEDIR_VOID AC_PROG_GCC_TRADITIONAL diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc index 529dcc4..69324f7 100644 --- a/src/adler32memcpy.cc +++ b/src/adler32memcpy.cc @@ -225,19 +225,41 @@ bool AdlerMemcpyWarmC(uint64 *dstmem64, uint64 *srcmem64, // x86_64 SSE2 assembly implementation of fast and stressful Adler memory copy. bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, unsigned int size_in_bytes, AdlerChecksum *checksum) { -// Use assembly implementation only with 64bit compilation. -#ifndef STRESSAPPTEST_CPU_X86_64 - // Fall back to C implementation for 32bit compilation. - return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum); -#else +// Use assembly implementation where supported. +#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + +// Pull a bit of tricky preprocessing to make the inline asm both +// 32 bit and 64 bit. +#ifdef STRESSAPPTEST_CPU_I686 // Instead of coding both, x86... +#define rAX "%%eax" +#define rCX "%%ecx" +#define rDX "%%edx" +#define rBX "%%ebx" +#define rSP "%%esp" +#define rBP "%%ebp" +#define rSI "%%esi" +#define rDI "%%edi" +#endif + +#ifdef STRESSAPPTEST_CPU_X86_64 // ...and x64, we use rXX macros. +#define rAX "%%rax" +#define rCX "%%rcx" +#define rDX "%%rdx" +#define rBX "%%rbx" +#define rSP "%%rsp" +#define rBP "%%rbp" +#define rSI "%%rsi" +#define rDI "%%rdi" +#endif + // Elements 0 to 3 are used for holding checksum terms a1, a2, // b1, b2 respectively. These elements are filled by asm code. // Elements 4 and 5 are used by asm code to for ANDing MMX data and removing // 2 words from each MMX register (A MMX reg has 4 words, by ANDing we are // setting word index 0 and word index 2 to zero). // Element 6 and 7 are used for setting a1 and a2 to 1. - volatile uint64 checksum_arr[] = {0, 0, 0, 0, - 0x00000000ffffffffUL, 0x00000000ffffffffUL, 1, 1}; + volatile uint64 checksum_arr[] __attribute__ ((aligned(16))) = + {0, 0, 0, 0, 0x00000000ffffffffUL, 0x00000000ffffffffUL, 1, 1}; if ((size_in_bytes >> 19) > 0) { // Size is too large. Must be less than 2^19 bytes = 512 KB. @@ -245,23 +267,24 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, } // Number of 32-bit words which are not added to a1/a2 in the main loop. - uint64 remaining_words = (size_in_bytes % 48) / 4; + uint32 remaining_words = (size_in_bytes % 48) / 4; // Since we are moving 48 bytes at a time number of iterations = total size/48 // is value of counter. - uint64 num_of_48_byte_units = size_in_bytes / 48; + uint32 num_of_48_byte_units = size_in_bytes / 48; - asm volatile( + asm volatile ( // Source address is in ESI (extended source index) // destination is in EDI (extended destination index) - // and counter is already in ECX (extended counter index). - "cmp $0, %%ecx;" // Compare counter to zero. + // and counter is already in ECX (extended counter + // index). + "cmp $0, " rCX ";" // Compare counter to zero. "jz END;" // XMM6 is initialized with 1 and XMM7 with 0. - "prefetchnta 0(%%rsi);" - "prefetchnta 64(%%rsi);" - "movdqu 48(%%rax), %%xmm6;" + "prefetchnta 0(" rSI ");" + "prefetchnta 64(" rSI ");" + "movdqu 48(" rAX "), %%xmm6;" "xorps %%xmm7, %%xmm7;" // Start of the loop which copies 48 bytes from source to dst each time. @@ -269,28 +292,28 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, // Make 6 moves each of 16 bytes from srcmem to XMM registers. // We are using 2 words out of 4 words in each XMM register, - // word index 0 and word index 2) - "movdqa 0(%%rsi), %%xmm0;" - "movdqu 4(%%rsi), %%xmm1;" // Be careful to use unaligned move here. - "movdqa 16(%%rsi), %%xmm2;" - "movdqu 20(%%rsi), %%xmm3;" - "movdqa 32(%%rsi), %%xmm4;" - "movdqu 36(%%rsi), %%xmm5;" + // word index 0 and word index 2 + "movdqa 0(" rSI "), %%xmm0;" + "movdqu 4(" rSI "), %%xmm1;" // Be careful to use unaligned move here. + "movdqa 16(" rSI "), %%xmm2;" + "movdqu 20(" rSI "), %%xmm3;" + "movdqa 32(" rSI "), %%xmm4;" + "movdqu 36(" rSI "), %%xmm5;" // Move 3 * 16 bytes from XMM registers to dstmem. // Note: this copy must be performed before pinsrw instructions since // they will modify the XMM registers. - "movntdq %%xmm0, 0(%%rdi);" - "movntdq %%xmm2, 16(%%rdi);" - "movntdq %%xmm4, 32(%%rdi);" + "movntdq %%xmm0, 0(" rDI ");" + "movntdq %%xmm2, 16(" rDI ");" + "movntdq %%xmm4, 32(" rDI ");" // Sets the word[1] and word[3] of XMM0 to XMM5 to zero. - "andps 32(%%rax), %%xmm0;" - "andps 32(%%rax), %%xmm1;" - "andps 32(%%rax), %%xmm2;" - "andps 32(%%rax), %%xmm3;" - "andps 32(%%rax), %%xmm4;" - "andps 32(%%rax), %%xmm5;" + "andps 32(" rAX "), %%xmm0;" + "andps 32(" rAX "), %%xmm1;" + "andps 32(" rAX "), %%xmm2;" + "andps 32(" rAX "), %%xmm3;" + "andps 32(" rAX "), %%xmm4;" + "andps 32(" rAX "), %%xmm5;" // Add XMM0 to XMM6 and then add XMM6 to XMM7. // Repeat this for XMM1, ..., XMM5. @@ -311,43 +334,43 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, "paddq %%xmm6, %%xmm7;" // Increment ESI and EDI by 48 bytes and decrement counter by 1. - "add $48, %%rsi;" - "add $48, %%rdi;" - "prefetchnta 0(%%rsi);" - "prefetchnta 64(%%rsi);" - "dec %%rcx;" + "add $48, " rSI ";" + "add $48, " rDI ";" + "prefetchnta 0(" rSI ");" + "prefetchnta 64(" rSI ");" + "dec " rCX ";" "jnz TOP;" // Now only remaining_words 32-bit words are left. // make a loop, add first two words to a1 and next two to a2 (just like // above loop, the only extra thing we are doing is rechecking - // %rdx (=remaining_words) everytime we add a number to a1/a2. + // rDX (=remaining_words) everytime we add a number to a1/a2. "REM_IS_STILL_NOT_ZERO:\n" // Unless remaining_words becomes less than 4 words(16 bytes) // there is not much issue and remaining_words will always // be a multiple of four by assumption. - "cmp $4, %%rdx;" + "cmp $4, " rDX ";" // In case for some weird reasons if remaining_words becomes // less than 4 but not zero then also break the code and go off to END. "jl END;" // Otherwise just go on and copy data in chunks of 4-words at a time till // whole data (<48 bytes) is copied. - "movdqa 0(%%rsi), %%xmm0;" // Copy next 4-words to XMM0 and to XMM1. + "movdqa 0(" rSI "), %%xmm0;" // Copy next 4-words to XMM0 and to XMM1. - "movdqa 0(%%rsi), %%xmm5;" // Accomplish movdqu 4(%%rsi) without + "movdqa 0(" rSI "), %%xmm5;" // Accomplish movdqu 4(%rSI) without "pshufd $0x39, %%xmm5, %%xmm1;" // indexing off memory boundary. - "movntdq %%xmm0, 0(%%rdi);" // Copy 4-words to destination. - "andps 32(%%rax), %%xmm0;" - "andps 32(%%rax), %%xmm1;" + "movntdq %%xmm0, 0(" rDI ");" // Copy 4-words to destination. + "andps 32(" rAX "), %%xmm0;" + "andps 32(" rAX "), %%xmm1;" "paddq %%xmm0, %%xmm6;" "paddq %%xmm6, %%xmm7;" "paddq %%xmm1, %%xmm6;" "paddq %%xmm6, %%xmm7;" - "add $16, %%rsi;" - "add $16, %%rdi;" - "sub $4, %%rdx;" - // Decrement %%rdx by 4 since %%rdx is number of 32-bit + "add $16, " rSI ";" + "add $16, " rDI ";" + "sub $4, " rDX ";" + // Decrement %rDX by 4 since %rDX is number of 32-bit // words left after considering all 48-byte units. "jmp REM_IS_STILL_NOT_ZERO;" @@ -356,8 +379,8 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, // 64 bit numbers and have to be converted to 64 bit numbers) // seems like Adler128 (since size of each part is 4 byte rather than // 1 byte). - "movdqa %%xmm6, 0(%%rax);" - "movdqa %%xmm7, 16(%%rax);" + "movdqa %%xmm6, 0(" rAX ");" + "movdqa %%xmm7, 16(" rAX ");" "sfence;" // No output registers. @@ -376,5 +399,8 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, // that there is no problem with memory this just mean that data was copied // from src to dst and checksum was calculated successfully). return true; +#else + // Fall back to C implementation for anything else. + return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum); #endif } diff --git a/src/disk_blocks.h b/src/disk_blocks.h index f4ca93f..cb634c9 100644 --- a/src/disk_blocks.h +++ b/src/disk_blocks.h @@ -100,7 +100,7 @@ class DiskBlockTable { typedef vector PosToAddrVector; PosToAddrVector pos_to_addr_; AddrToBlockMap addr_to_block_; - int64 nelems_; + uint64 nelems_; int sector_size_; // Sector size, in bytes int write_block_size_; // Block size, in bytes string device_name_; // Device name diff --git a/src/finelock_queue.cc b/src/finelock_queue.cc index 569903a..8d914b8 100644 --- a/src/finelock_queue.cc +++ b/src/finelock_queue.cc @@ -45,7 +45,7 @@ FineLockPEQueue::FineLockPEQueue( queue_metric_ = kTouch; { // Init all the page locks. - for (int64 i = 0; i < q_size_; i++) { + for (uint64 i = 0; i < q_size_; i++) { pthread_mutex_init(&(pagelocks_[i]), NULL); // Pages start out owned (locked) by Sat::InitializePages. // A locked state indicates that the page state is unknown, @@ -147,7 +147,7 @@ int64 FineLockPEQueue::getC(int64 m) { // Destructor: Clean-up allocated memory and destroy pthread locks. FineLockPEQueue::~FineLockPEQueue() { - int64 i; + uint64 i; for (i = 0; i < q_size_; i++) pthread_mutex_destroy(&(pagelocks_[i])); delete[] pagelocks_; @@ -173,11 +173,11 @@ bool FineLockPEQueue::QueueAnalysis() { } // Bucketize the page counts by highest bit set. - for (int64 i = 0; i < q_size_; i++) { + for (uint64 i = 0; i < q_size_; i++) { uint32 readcount = pages_[i].touch; int b = 0; for (b = 0; b < 31; b++) { - if (readcount < (1 << b)) + if (readcount < (1u << b)) break; } @@ -271,7 +271,7 @@ bool FineLockPEQueue::GetPageFromPhysical(uint64 paddr, struct page_entry *pe) { // Traverse through array until finding a page // that contains the address we want.. - for (int64 i = 0; i < q_size_; i++) { + for (uint64 i = 0; i < q_size_; i++) { uint64 page_addr = pages_[i].paddr; // This assumes linear vaddr. if ((page_addr <= paddr) && (page_addr + page_size_ > paddr)) { @@ -335,7 +335,7 @@ bool FineLockPEQueue::GetRandomWithPredicateTag(struct page_entry *pe, uint64 next_try = 1; // Traverse through array until finding a page meeting given predicate. - for (int64 i = 0; i < q_size_; i++) { + for (uint64 i = 0; i < q_size_; i++) { uint64 index = (next_try + first_try) % q_size_; // Go through the loop linear conguentially. We are offsetting by // 'first_try' so this path will be a different sequence for every diff --git a/src/finelock_queue.h b/src/finelock_queue.h index 54b154e..2de5a46 100644 --- a/src/finelock_queue.h +++ b/src/finelock_queue.h @@ -57,7 +57,9 @@ class FineLockPEQueue { uint64 GetRandom64FromSlot(int slot); // Helper function to check index range, returns true if index is valid. - bool valid_index(int64 index) { return index >= 0 && index < q_size_; } + bool valid_index(int64 index) { + return index >= 0 && static_cast(index) < q_size_; + } // Returns true if page entry is valid, false otherwise. static bool page_is_valid(struct page_entry *pe) { @@ -85,7 +87,7 @@ class FineLockPEQueue { pthread_mutex_t *pagelocks_; // Per-page-entry locks. struct page_entry *pages_; // Where page entries are held. - int64 q_size_; // Size of the queue. + uint64 q_size_; // Size of the queue. int64 page_size_; // For calculating array index from offset. enum { diff --git a/src/logger.cc b/src/logger.cc index 81f1e3e..e4ecb03 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -38,7 +38,7 @@ void Logger::VLogF(int priority, const char *format, va_list args) { } char buffer[4096]; int length = vsnprintf(buffer, sizeof buffer, format, args); - if (length >= sizeof buffer) { + if (static_cast(length) >= sizeof buffer) { length = sizeof buffer; buffer[sizeof buffer - 1] = '\n'; } @@ -96,7 +96,8 @@ void Logger::QueueLogLine(string *line) { namespace { void WriteToFile(const string& line, int fd) { - LOGGER_ASSERT(write(fd, line.data(), line.size()) == line.size()); + LOGGER_ASSERT(write(fd, line.data(), line.size()) == + static_cast(line.size())); } } diff --git a/src/logger.h b/src/logger.h index 3eaea57..1d70107 100644 --- a/src/logger.h +++ b/src/logger.h @@ -28,7 +28,7 @@ // Attempts to log additional lines will block when the queue reaches this size. // Due to how the logging thread works, up to twice this many log lines may be // outstanding at any point. -static const int kMaxQueueSize = 250; +static const size_t kMaxQueueSize = 250; // This is only for use by the Logger class, do not use it elsewhere! diff --git a/src/os.cc b/src/os.cc index 4784028..1340d6b 100644 --- a/src/os.cc +++ b/src/os.cc @@ -53,8 +53,12 @@ OsLayer::OsLayer() { testmemsize_ = 0; totalmemsize_ = 0; min_hugepages_bytes_ = 0; - error_injection_ = false; normal_mem_ = true; + use_hugepages_ = false; + use_posix_shm_ = false; + dynamic_mapped_shmem_ = false; + shmid_ = 0; + time_initialized_ = 0; regionsize_ = 0; @@ -64,6 +68,13 @@ OsLayer::OsLayer() { num_cpus_per_node_ = 0; error_diagnoser_ = 0; err_log_callback_ = 0; + error_injection_ = false; + + void *pvoid = 0; + address_mode_ = sizeof(pvoid) * 8; + + has_clflush_ = false; + has_sse2_ = false; } // OsLayer cleanup. @@ -75,8 +86,9 @@ OsLayer::~OsLayer() { // OsLayer initialization. bool OsLayer::Initialize() { time_initialized_ = time(NULL); - use_hugepages_ = false; - shmid_ = 0; + // Detect asm support. + GetFeatures(); + if (num_cpus_ == 0) { num_nodes_ = 1; num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN); @@ -129,13 +141,53 @@ list OsLayer::FindFileDevices() { return locations; } + +// Get HW core features from cpuid instruction. +void OsLayer::GetFeatures() { +#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // CPUID features documented at: + // http://www.sandpile.org/ia32/cpuid.htm + int ax, bx, cx, dx; + __asm__ __volatile__ ( + "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1)); + has_clflush_ = (dx >> 19) & 1; + has_sse2_ = (dx >> 26) & 1; + + logprintf(9, "Log: has clflush: %s, has sse2: %s\n", + has_clflush_ ? "true" : "false", + has_sse2_ ? "true" : "false"); +#elif defined(STRESSAPPTEST_CPU_PPC) + // All PPC implementations have cache flush instructions. + has_clflush_ = true; +#elif defined(STRESSAPPTEST_CPU_ARMV7A) +#warning "Unsupported CPU type ARMV7A: unable to determine feature set." +#else +#warning "Unsupported CPU type: unable to determine feature set." +#endif +} + + // We need to flush the cacheline here. void OsLayer::Flush(void *vaddr) { // Use the generic flush. This function is just so we can override // this if we are so inclined. - FastFlush(vaddr); + if (has_clflush_) + FastFlush(vaddr); +} + + +// Run C or ASM copy as appropriate.. +bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, + unsigned int size_in_bytes, + AdlerChecksum *checksum) { + if (has_sse2_) { + return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum); + } else { + return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum); + } } + // Translate user virtual to physical address. int OsLayer::FindDimm(uint64 addr, char *buf, int len) { char tmpbuf[256]; @@ -317,65 +369,155 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { // Try hugepages first. void *buf = 0; + sat_assert(length >= 0); + if (paddr_base) logprintf(0, "Process Error: non zero paddr_base %#llx is not supported," " ignore.\n", paddr_base); - { // Allocate hugepage mapped memory. - int shmid; - void *shmaddr; - - if ((shmid = shmget(2, length, - SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { - int err = errno; - char errtxt[256] = ""; - strerror_r(err, errtxt, sizeof(errtxt)); - logprintf(12, "Log: failed to allocate shared mem object - err %d (%s)\n", - err, errtxt); - goto hugepage_failover; - } + // Determine optimal memory allocation path. + bool prefer_hugepages = false; + bool prefer_posix_shm = false; + bool prefer_dynamic_mapping = false; - shmaddr = shmat(shmid, NULL, NULL); - if (shmaddr == reinterpret_cast(-1)) { - int err = errno; - char errtxt[256] = ""; - strerror_r(err, errtxt, sizeof(errtxt)); - logprintf(0, "Log: failed to attach shared mem object - err %d (%s).\n", - err, errtxt); - if (shmctl(shmid, IPC_RMID, NULL) < 0) { + // Are there enough hugepages? + int64 hugepagesize = FindHugePages() * 2 * kMegabyte; + // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory? + if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) { + prefer_dynamic_mapping = true; + prefer_posix_shm = true; + logprintf(3, "Log: Prefer POSIX shared memory allocation.\n"); + logprintf(3, "Log: You may need to run " + "'sudo mount -o remount,size=100\% /dev/shm.'\n"); + } else if (hugepagesize >= length) { + prefer_hugepages = true; + logprintf(3, "Log: Prefer using hugepace allocation.\n"); + } else { + logprintf(3, "Log: Prefer plain malloc memory allocation.\n"); + } + + // Allocate hugepage mapped memory. + if (prefer_hugepages) { + do { // Allow break statement. + int shmid; + void *shmaddr; + + if ((shmid = shmget(2, length, + SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { int err = errno; - char errtxt[256] = ""; - strerror_r(err, errtxt, sizeof(errtxt)); - logprintf(0, "Log: failed to remove shared mem object - err %d (%s).\n", - err, errtxt); + string errtxt = ErrorString(err); + logprintf(3, "Log: failed to allocate shared hugepage " + "object - err %d (%s)\n", + err, errtxt.c_str()); + logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n"); + break; } - goto hugepage_failover; - } - use_hugepages_ = true; - shmid_ = shmid; - buf = shmaddr; - logprintf(0, "Log: Using hugepages 0x%x at %p.\n", shmid, shmaddr); + + shmaddr = shmat(shmid, NULL, NULL); + if (shmaddr == reinterpret_cast(-1)) { + int err = errno; + string errtxt = ErrorString(err); + logprintf(0, "Log: failed to attach shared " + "hugepage object - err %d (%s).\n", + err, errtxt.c_str()); + if (shmctl(shmid, IPC_RMID, NULL) < 0) { + int err = errno; + string errtxt = ErrorString(err); + logprintf(0, "Log: failed to remove shared " + "hugepage object - err %d (%s).\n", + err, errtxt.c_str()); + } + break; + } + use_hugepages_ = true; + shmid_ = shmid; + buf = shmaddr; + logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n", + shmid, shmaddr); + } while (0); } - hugepage_failover: + if ((!use_hugepages_) && prefer_posix_shm) { + do { + int shm_object; + void *shmaddr = NULL; + + shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU); + if (shm_object < 0) { + int err = errno; + string errtxt = ErrorString(err); + logprintf(3, "Log: failed to allocate shared " + "smallpage object - err %d (%s)\n", + err, errtxt.c_str()); + break; + } + + if (0 > ftruncate(shm_object, length)) { + int err = errno; + string errtxt = ErrorString(err); + logprintf(3, "Log: failed to ftruncate shared " + "smallpage object - err %d (%s)\n", + err, errtxt.c_str()); + break; + } + + // 32 bit linux apps can only use ~1.4G of address space. + // Use dynamic mapping for allocations larger than that. + // Currently perf hit is ~10% for this. + if (prefer_dynamic_mapping) { + dynamic_mapped_shmem_ = true; + } else { + // Do a full mapping here otherwise. + shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE, + shm_object, NULL); + if (shmaddr == reinterpret_cast(-1)) { + int err = errno; + string errtxt = ErrorString(err); + logprintf(0, "Log: failed to map shared " + "smallpage object - err %d (%s).\n", + err, errtxt.c_str()); + break; + } + } + + use_posix_shm_ = true; + shmid_ = shm_object; + buf = shmaddr; + char location_message[256] = ""; + if (dynamic_mapped_shmem_) { + sprintf(location_message, "mapped as needed"); + } else { + sprintf(location_message, "at %p", shmaddr); + } + logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n", + shm_object, location_message); + } while (0); + shm_unlink("/stressapptest"); + } - if (!use_hugepages_) { + if (!use_hugepages_ && !use_posix_shm_) { // Use memalign to ensure that blocks are aligned enough for disk direct IO. buf = static_cast(memalign(4096, length)); - if (buf) + if (buf) { logprintf(0, "Log: Using memaligned allocation at %p.\n", buf); - else + } else { logprintf(0, "Process Error: memalign returned 0\n"); + if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) { + logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 " + "bit process. Please setup shared memory.\n"); + } + } } testmem_ = buf; - if (buf) { + if (buf || dynamic_mapped_shmem_) { testmemsize_ = length; } else { testmemsize_ = 0; } - return (buf != 0); + return (buf != 0) || dynamic_mapped_shmem_; } // Free the test memory. @@ -384,6 +526,11 @@ void OsLayer::FreeTestMem() { if (use_hugepages_) { shmdt(testmem_); shmctl(shmid_, IPC_RMID, NULL); + } else if (use_posix_shm_) { + if (!dynamic_mapped_shmem_) { + munmap(testmem_, testmemsize_); + } + close(shmid_); } else { free(testmem_); } @@ -396,11 +543,37 @@ void OsLayer::FreeTestMem() { // Prepare the target memory. It may requre mapping in, or this may be a noop. void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) { sat_assert((offset + length) <= testmemsize_); + if (dynamic_mapped_shmem_) { + // TODO(nsanders): Check if we can support MAP_NONBLOCK, + // and evaluate performance hit from not using it. + void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE, + shmid_, offset); + if (mapping == MAP_FAILED) { + string errtxt = ErrorString(errno); + logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. " + "error: %s.\n", + offset, length, errtxt.c_str()); + sat_assert(0); + } + return mapping; + } + return reinterpret_cast(reinterpret_cast(testmem_) + offset); } // Release the test memory resources, if any. void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) { + if (dynamic_mapped_shmem_) { + int retval = munmap(addr, length); + if (retval == -1) { + string errtxt = ErrorString(errno); + logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. " + "error: %s.\n", + addr, length, errtxt.c_str()); + sat_assert(0); + } + } } // No error polling on unknown systems. @@ -453,7 +626,7 @@ uint32 OsLayer::PciRead(int fd, uint32 offset, int width) { logprintf(0, "Process Error: Can't seek %x\n", offset); return 0; } - if (read(fd, &datacast, size) != size) { + if (read(fd, &datacast, size) != static_cast(size)) { logprintf(0, "Process Error: Can't read %x\n", offset); return 0; } @@ -502,7 +675,7 @@ void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) { logprintf(0, "Process Error: Can't seek %x\n", offset); return; } - if (write(fd, &datacast, size) != size) { + if (write(fd, &datacast, size) != static_cast(size)) { logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset); return; } diff --git a/src/os.h b/src/os.h index 9ed04d5..28c8a2a 100644 --- a/src/os.h +++ b/src/os.h @@ -125,6 +125,8 @@ class OsLayer { asm volatile("mfence"); asm volatile("clflush (%0)" :: "r" (vaddr)); asm volatile("mfence"); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes." #else #warning "Unsupported CPU type: Unable to force cache flushes." #endif @@ -152,6 +154,9 @@ class OsLayer { datacast_t data; __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h)); tsc = data.l64; +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + #warning "Unsupported CPU type ARMV7A: your build may not function correctly" + tsc = 0; #else #warning "Unsupported CPU type: your build may not function correctly" tsc = 0; @@ -181,6 +186,8 @@ class OsLayer { // Returns 32 for 32-bit, 64 for 64-bit. virtual int AddressMode(); + // Update OsLayer state regarding cpu support for various features. + virtual void GetFeatures(); // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file. virtual int PciOpen(int bus, int device, int function); @@ -217,12 +224,10 @@ class OsLayer { // Detect all PCI Devices. virtual PCIDevices GetPCIDevices(); - // Default platform dependent warm Adler memcpy to C implementation - // for compatibility. + // Disambiguate between different "warm" memcopies. virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, unsigned int size_in_bytes, - AdlerChecksum *checksum) - {return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);} + AdlerChecksum *checksum); // Store a callback to use to print // app-specific info about the last error location. @@ -237,12 +242,14 @@ class OsLayer { protected: void *testmem_; // Location of test memory. - int64 testmemsize_; // Size of test memory. + uint64 testmemsize_; // Size of test memory. int64 totalmemsize_; // Size of available memory. int64 min_hugepages_bytes_; // Minimum hugepages size. bool error_injection_; // Do error injection? bool normal_mem_; // Memory DMA capable? bool use_hugepages_; // Use hugepage shmem? + bool use_posix_shm_; // Use 4k page shmem? + bool dynamic_mapped_shmem_; // Conserve virtual address space. int shmid_; // Handle to shmem int64 regionsize_; // Size of memory "regions" @@ -250,6 +257,10 @@ class OsLayer { int num_cpus_; // Number of cpus in the system. int num_nodes_; // Number of nodes in the system. int num_cpus_per_node_; // Number of cpus per node in the system. + int address_mode_; // Are we running 32 or 64 bit? + bool has_sse2_; // Do we have sse2 instructions? + bool has_clflush_; // Do we have clflush instructions? + time_t time_initialized_; // Start time of test. diff --git a/src/pattern.cc b/src/pattern.cc index 2fb552a..9f22674 100644 --- a/src/pattern.cc +++ b/src/pattern.cc @@ -393,7 +393,7 @@ int PatternList::Destroy() { // Return pattern numbered "i" Pattern *PatternList::GetPattern(int i) { - if (i < size_) { + if (static_cast(i) < size_) { return &patterns_[i]; } diff --git a/src/pattern.h b/src/pattern.h index b1168aa..181f839 100644 --- a/src/pattern.h +++ b/src/pattern.h @@ -102,7 +102,7 @@ class PatternList { private: vector patterns_; int weightcount_; // Total count of pattern weights. - int size_; + unsigned int size_; int initialized_; DISALLOW_COPY_AND_ASSIGN(PatternList); }; diff --git a/src/sat.cc b/src/sat.cc index 06b4c65..bed62b7 100644 --- a/src/sat.cc +++ b/src/sat.cc @@ -164,26 +164,6 @@ bool Sat::CheckEnvironment() { return false; } - if ((address_mode_ == 32) && - (os_->normal_mem()) && - (size_ >= 1499 * kMegabyte)) { - if (run_on_anything_) { - int64 new_size_mb = 1499; - logprintf(1, "Log: 32 bit binary: reducing from %lldMB to %lldMB\n", - size_mb_, - new_size_mb); - size_mb_ = new_size_mb; - size_ = size_mb_ * kMegabyte; - } else { - logprintf(0, "Process Error: %dMB test memory too large " - "for 32 bit binary.\n", - static_cast(size_ / kMegabyte)); - logprintf(0, "Log: Command line option '-A' bypasses this error.\n"); - bad_status(); - return false; - } - } - // If platform is 32 bit Xeon, floor memory size to multiple of 4. if (address_mode_ == 32) { size_mb_ = (size_mb_ / 4) * 4; @@ -350,7 +330,7 @@ void Sat::AddrMapUpdate(struct page_entry *pe) { for (int i = 0; i < page_length_; i += 4096) { uint64 paddr = os_->VirtualToPhysical(base + i); - int offset = paddr / 4096 / 8; + uint32 offset = paddr / 4096 / 8; unsigned char mask = 1 << ((paddr / 4096) % 8); if (offset >= arraysize) { @@ -969,7 +949,8 @@ bool Sat::ParseArgs(int argc, char **argv) { } // Set disk_pages_ if filesize or page size changed. - if (filesize != page_length_ * disk_pages_) { + if (filesize != static_cast(page_length_) * + static_cast(disk_pages_)) { disk_pages_ = filesize / page_length_; if (disk_pages_ == 0) disk_pages_ = 1; @@ -1014,7 +995,7 @@ void Sat::PrintHelp() { " --force_errors_like_crazy inject a lot of false errors " "to test error handling\n" " -F don't result check each transaction\n" - "--stop_on_errors Stop after finding the first error.\n" + " --stop_on_errors Stop after finding the first error.\n" " --read-block-size size of block for reading (-d)\n" " --write-block-size size of block for writing (-d). If not " "defined, the size of block for writing will be defined as the " @@ -1041,7 +1022,7 @@ void Sat::PrintHelp() { " --pause_duration duration (in seconds) of each pause\n" " --local_numa : choose memory regions associated with " "each CPU to be tested by that CPU\n" - "--remote_numa : choose memory regions not associated with " + " --remote_numa : choose memory regions not associated with " "each CPU to be tested by that CPU\n"); } @@ -1850,7 +1831,7 @@ bool Sat::Cleanup() { delete[] page_bitmap_; } - for (int i = 0; i < blocktables_.size(); i++) { + for (size_t i = 0; i < blocktables_.size(); i++) { delete blocktables_[i]; } diff --git a/src/sat.h b/src/sat.h index 950270f..b48f519 100644 --- a/src/sat.h +++ b/src/sat.h @@ -164,7 +164,7 @@ class Sat { bool error_injection_; // Simulate errors, for unittests. bool crazy_error_injection_; // Simulate lots of errors. - int64 max_errorcount_; // Number of errors before forced exit. + uint64 max_errorcount_; // Number of errors before forced exit. int run_on_anything_; // Ignore unknown machine ereor. int use_logfile_; // Log to a file. char logfilename_[255]; // Name of file to log to. diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in index 535bb34..b78857c 100644 --- a/src/stressapptest_config.h.in +++ b/src/stressapptest_config.h.in @@ -148,6 +148,9 @@ /* Define to 1 if strerror_r returns char *. */ #undef STRERROR_R_CHAR_P +/* Defined if the target CPU is armv7a */ +#undef STRESSAPPTEST_CPU_ARMV7A + /* Defined if the target CPU is i686 */ #undef STRESSAPPTEST_CPU_I686 diff --git a/src/worker.cc b/src/worker.cc index c568064..2fab28e 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -86,6 +86,9 @@ namespace { int cpu; #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx"); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + #warning "Unsupported CPU type ARMV7A: unable to determine core ID." + cpu = 0; #else #warning "Unsupported CPU type: unable to determine core ID." cpu = 0; @@ -1953,7 +1956,6 @@ bool FileThread::Work() { } pages_copied_ = loops * sat_->disk_pages(); - status_ = result; // Clean up. CloseFile(fd); @@ -1961,7 +1963,10 @@ bool FileThread::Work() { logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n", thread_num_, status_, pages_copied_); - return result; + // Failure to read from device indicates hardware, + // rather than procedural SW error. + status_ = true; + return true; } bool NetworkThread::IsNetworkStopSet() { @@ -2259,7 +2264,7 @@ bool NetworkListenThread::ReapSlaves() { // Gather status and reap threads. logprintf(12, "Log: Joining all outstanding threads\n"); - for (int i = 0; i < child_workers_.size(); i++) { + for (size_t i = 0; i < child_workers_.size(); i++) { NetworkSlaveThread& child_thread = child_workers_[i]->thread; logprintf(12, "Log: Joining slave thread %d\n", i); child_thread.JoinThread(); @@ -2689,7 +2694,7 @@ bool DiskThread::GetDiskSize(int fd) { return false; } - // If an Elephant is initialized with status DEAD its size will be zero. + // Zero size indicates nonworking device.. if (block_size == 0) { os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1); ++errorcount_; @@ -2734,11 +2739,11 @@ int64 DiskThread::GetTime() { } // Do randomized reads and (possibly) writes on a device. -// Return false on fatal error, either SW or HW. +// Return false on fatal SW error, true on SW success, +// regardless of whether HW failed. bool DiskThread::DoWork(int fd) { int64 block_num = 0; int64 num_segments; - bool result = true; if (segment_size_ == -1) { num_segments = 1; @@ -2775,7 +2780,8 @@ bool DiskThread::DoWork(int fd) { non_destructive_ ? "(disabled) " : "", device_name_.c_str(), thread_num_); while (IsReadyToRunNoPause() && - in_flight_sectors_.size() < queue_size_ + 1) { + in_flight_sectors_.size() < + static_cast(queue_size_ + 1)) { // Confine testing to a particular segment of the disk. int64 segment = (block_num / blocks_per_segment_) % num_segments; if (!non_destructive_ && @@ -2810,7 +2816,7 @@ bool DiskThread::DoWork(int fd) { if (!non_destructive_) { if (!WriteBlockToDisk(fd, block)) { block_table_->RemoveBlock(block); - return false; + return true; } blocks_written_++; } @@ -2829,14 +2835,14 @@ bool DiskThread::DoWork(int fd) { BlockData *block = in_flight_sectors_.front(); in_flight_sectors_.pop(); if (!ValidateBlockOnDisk(fd, block)) - return false; + return true; block_table_->RemoveBlock(block); blocks_read_++; } } pages_copied_ = blocks_written_ + blocks_read_; - return result; + return true; } // Do an asynchronous disk I/O operation. @@ -2923,7 +2929,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, // event.res contains the number of bytes written/read or // error if < 0, I think. - if (event.res != size) { + if (event.res != static_cast(size)) { errorcount_++; os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); -- 2.30.2