Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
benchmark program
  • Loading branch information
blazek committed Nov 20, 2011
1 parent f0f8a12 commit 555e028
Show file tree
Hide file tree
Showing 6 changed files with 1,027 additions and 0 deletions.
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
@@ -1,3 +1,4 @@
IF (ENABLE_TESTS)
ADD_SUBDIRECTORY(src)
ADD_SUBDIRECTORY(bench)
ENDIF (ENABLE_TESTS)
64 changes: 64 additions & 0 deletions tests/bench/CMakeLists.txt
@@ -0,0 +1,64 @@
########################################################
# Files

SET (BENCH_SRCS
main.cpp
qgsbench.cpp
)

SET (BENCH_MOC_HDRS
qgsbench.h
)

########################################################
# Build

QT4_WRAP_CPP (BENCH_MOC_SRCS ${BENCH_MOC_HDRS})

ADD_EXECUTABLE (qgis_bench MACOSX_BUNDLE WIN32 ${BENCH_SRCS} ${BENCH_MOC_SRCS} )

INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../src/core
${CMAKE_CURRENT_SOURCE_DIR}/../../src/core/raster
${CMAKE_CURRENT_BINARY_DIR}
# ${GDAL_INCLUDE_DIR} # remove once raster layer is cleaned up
)

IF (WITH_INTERNAL_SPATIALITE)
INCLUDE_DIRECTORIES(../../src/core/spatialite/headers/spatialite)
ELSE (WITH_INTERNAL_SPATIALITE)
INCLUDE_DIRECTORIES(${SQLITE3_INCLUDE_DIR})
ENDIF (WITH_INTERNAL_SPATIALITE)

TARGET_LINK_LIBRARIES(qgis_bench qgis_core)

IF (NOT WITH_INTERNAL_SPATIALITE)
TARGET_LINK_LIBRARIES(qgis_bench ${SQLITE3_LIBRARY})
ENDIF (NOT WITH_INTERNAL_SPATIALITE)

TARGET_LINK_LIBRARIES(qgis_bench
${QT_QTCORE_LIBRARY}
${QT_QTNETWORK_LIBRARY}
${QT_QTSVG_LIBRARY}
${QT_QTXML_LIBRARY}
${QT_QTWEBKIT_LIBRARY}
${QT_QTMAIN_LIBRARY}
${QT_QTTEST_LIBRARY}
)

SET_TARGET_PROPERTIES(qgis_bench PROPERTIES
INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/${QGIS_LIB_DIR}
INSTALL_RPATH_USE_LINK_PATH true
)

########################################################
# Install

INSTALL (TARGETS qgis_bench
BUNDLE DESTINATION ${QGIS_BIN_DIR}
RUNTIME DESTINATION ${QGIS_BIN_DIR}
)
IF (APPLE)
INSTALL (CODE "EXECUTE_PROCESS(COMMAND ln -sfh ../../../${QGIS_FW_SUBDIR} \"$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/${QGIS_BIN_DIR}/qgis_bench.app/Contents/Frameworks\")")
ENDIF (APPLE)

78 changes: 78 additions & 0 deletions tests/bench/README
@@ -0,0 +1,78 @@
Time measurement
------------------

For usable benchmarking we need a precise, reliable and repeatable time measurement. It seems to be easy? We are on computer right? Unfortunetly I found it almost impossible! Hopefully I am totaly wrong.

Several "kind of time" exist: real (wall clock), user CPU time, system CPU time. What I believe we have to use is task user + system CPU time.

There are varions commands/functions which can be used to measure time, e.g.:

1) time command (real, user, sys time):

time COMMAND

2) getrusage function (user, sys time):

#include <sys/time.h>
#include <sys/resource.h>
struct rusage usage;
getrusage( RUSAGE_SELF, &start);
// measured code
getrusage( RUSAGE_SELF, &end);
double user_elapsed = end.ru_utime.tv_sec + end.ru_utime.tv_usec/1000000. - start.ru_utime.tv_sec - start.ru_utime.tv_usec/1000000.;
double sys_elapsed = end.ru_stime.tv_sec + end.rs_utime.tv_usec/1000000. - start.ru_stime.tv_sec -start.ru_stime.tv_usec/1000000.;


3) times function (user, sys time):

#include <sys/times.h>
struct tms start, end;
times(&start);
// measured code
times(&end);
double user_elapsed = ((double)(end.tms_utime - start.tms_utime))/sysconf(_SC_CLK_TCK);
double sys_elapsed = ((double)(end.tms_stime - start.tms_stime))/sysconf(_SC_CLK_TCK);

4) clock function (user + sys time ?):

#include <time.h>
clock_t start = clock();
// measured code
clock_t end = clock();
double elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;

5) clock_gettime functions (user + sys time ?):

#include <time.h>
struct timespec start, end;
clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &start );
// measured code
clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &end );
double elapsed = end.tv_sec + end.tv_nsec / 1000000000. - start.tv_sec - start.tv_nsec / 1000000000.;

6) QTime class (QElapsedTimer Qt >= 4.7) (real time):

QTime time;
time.start();
// measured code
double elapsed = time.elapsed() / 1000.;

I tried all of them and all are giving the same mostly useless values. If the same piece of code is measured more times, the results differ. It seems that all those functions read user time from the same place, kernel task_struct.utime. The problem is, if I understand correctly, how the task_struct.utime is updated. Whenever timer interrupt comes (every 1/HZ) to scheduler, it calls update_process_times->account_process_tick->irqtime_account_process_tick->account_user_time and increases task_struct.utime by cputime_one_jiffy (1/HZ). See kernel/sched.c and kernel/timer.c. It means, that utime is not increased by pure time when the the task code is running, but by fixed interval which includes some task switching overhead??? In a simple test, I could observe 30% increas of utime if another application was running at the same time.

Unfortunately I don't see anything better than user time + sys time, running test with highest priority and avoiding other use of computer when tests are running, e.g.:

sudo chrt -f 99 COMMAND

To be sure that the measured values are correct, it is necessary to run more cycles and check some standard deviation or so.

There is also high level benchmark support available in QTestLib, it is possible to use QBENCHMARK macro + QTEST_MAIN to create easily test executable. Such test may be run with various options, some notes on modes/options:

-tickcounter - reads rdtsc register (on Linux), thus it counts real time, result is not constant

-callgrind - reruns the command with callgrind, number of 'instr. loads' is constant for constant number of iterations, number of instructions per iterarion decreases with number of iterarions (for small numbers of iterations) for a simple function the number of instractions of the second iteration may be 40% of the first one - cache, prediction??? Callgrind is realy very slow. I am not sure what 'instr. loads' exactly means and if it can be somehow converted to time, but I don't believe so. AFAIK each instruction need a different number of cycles and it may be different even for the same instruction because of CPU cache, then there are instruction pipelines etc.


Build options
-------------

CMAKE_BUILD_TYPE should be RelWithDebInfo so that it compiles with optimisations but also adds debug information so that it can be profiled with callgrind and visualized with kcachegrind.

0 comments on commit 555e028

Please sign in to comment.