Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add standard and detailed timers for benchmarking purposes for NEST 2.20.2 #2086

Merged
merged 3 commits into from
Aug 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ option( with-ps-arrays "Use PS array construction semantics. [default=ON]" ON )
# add user modules
set( external-modules OFF CACHE STRING "External NEST modules to be linked in, separated by ';'. [default=OFF]" )

# use detailed NEST internal timers
set( with-detailed-timers OFF CACHE STRING "Enable detailed NEST internal time measurements. Detailed timers can affect the performance. [default=OFF]" )

# connect NEST with external projects
set( with-libneurosim OFF CACHE STRING "Request the use of libneurosim. Optionally give the directory, where libneurosim is installed. [default=OFF]" )
set( with-music OFF CACHE STRING "Request the use of MUSIC. Optionally give the directory, where MUSIC is installed. [default=OFF]" )
Expand Down Expand Up @@ -157,6 +160,7 @@ nest_process_with_gsl()
nest_process_with_python()
nest_process_with_openmp()
nest_process_with_mpi()
nest_process_with_detailed_timers()
nest_process_with_libneurosim()
nest_process_with_music()
nest_process_with_boost()
Expand Down
8 changes: 8 additions & 0 deletions cmake/ConfigureSummary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ function( NEST_PRINT_CONFIG_SUMMARY )
message( "Use MPI : No" )
endif ()

if ( TIMER_DETAILED )
message( "Detailed timers : Yes" )
message( "" )
else ()
message( "Detailed timers : No" )
message( "" )
endif ()

if ( HAVE_MUSIC )
message( "Use MUSIC : Yes (MUSIC ${MUSIC_VERSION})" )
message( " Includes : ${MUSIC_INCLUDE_DIRS}" )
Expand Down
7 changes: 7 additions & 0 deletions cmake/ProcessOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,13 @@ function( NEST_PROCESS_WITH_MPI )
endif ()
endfunction()

function( NEST_PROCESS_WITH_DETAILED_TIMERS )
set( TIMER_DETAILED OFF PARENT_SCOPE )
if ( ${with-detailed-timers} STREQUAL "ON" )
set( TIMER_DETAILED ON PARENT_SCOPE )
endif ()
endfunction()

function( NEST_PROCESS_WITH_LIBNEUROSIM )
# Find libneurosim
set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE )
Expand Down
4 changes: 2 additions & 2 deletions doc/guides/HillTononiModels.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@
" for DT, V in DT_V_seq:\n",
" nest.SetStatus(nrn, {'V_m': V, 'voltage_clamp': True})\n",
" nest.Simulate(DT)\n",
" t_end = nest.GetKernelStatus()['time']\n",
" t_end = nest.GetKernelStatus()['biological_time']\n",
" \n",
" # simulate a little more so we get all data up to t_end to multimeter\n",
" nest.Simulate(2 * nest.GetKernelStatus()['min_delay'])\n",
Expand Down Expand Up @@ -1477,7 +1477,7 @@
" for DT, V in DT_V_seq:\n",
" nest.SetStatus(nrn, {'V_m': V, 'voltage_clamp': True})\n",
" nest.Simulate(DT)\n",
" t_end = nest.GetKernelStatus()['time']\n",
" t_end = nest.GetKernelStatus()['biological_time']\n",
" \n",
" # simulate a little more so we get all data up to t_end to multimeter\n",
" nest.Simulate(2 * nest.GetKernelStatus()['min_delay'])\n",
Expand Down
4 changes: 2 additions & 2 deletions doc/model_details/HillTononiModels.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@
" for DT, V in DT_V_seq:\n",
" nest.SetStatus(nrn, {'V_m': V, 'voltage_clamp': True})\n",
" nest.Simulate(DT)\n",
" t_end = nest.GetKernelStatus()['time']\n",
" t_end = nest.GetKernelStatus()['biological_time']\n",
" \n",
" # simulate a little more so we get all data up to t_end to multimeter\n",
" nest.Simulate(2 * nest.GetKernelStatus()['min_delay'])\n",
Expand Down Expand Up @@ -1479,7 +1479,7 @@
" for DT, V in DT_V_seq:\n",
" nest.SetStatus(nrn, {'V_m': V, 'voltage_clamp': True})\n",
" nest.Simulate(DT)\n",
" t_end = nest.GetKernelStatus()['time']\n",
" t_end = nest.GetKernelStatus()['biological_time']\n",
" \n",
" # simulate a little more so we get all data up to t_end to multimeter\n",
" nest.Simulate(2 * nest.GetKernelStatus()['min_delay'])\n",
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorials/topology/hill_tononi_Vp.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion doc/tutorials/topology/hill_tononi_Vp.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ def phaseInit(pos, lam, alpha):
extent=(0, Params['N'] + 1, 0, Params['N'] + 1),
vmin=vmn[sp - 1], vmax=vmx[sp - 1])
pylab.colorbar()
pylab.title(name + ', t = %6.1f ms' % nest.GetKernelStatus()['time'])
pylab.title(name + ', t = %6.1f ms' % nest.GetKernelStatus()['biological_time'])

pylab.draw() # force drawing inside loop
pylab.show() # required by ``pyreport``
Expand Down
2 changes: 1 addition & 1 deletion doc/tutorials/topology/hill_tononi_Vp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ functions, see documentation.
extent=(0, Params['N'] + 1, 0, Params['N'] + 1),
vmin=vmn[sp - 1], vmax=vmx[sp - 1])
pylab.colorbar()
pylab.title(name + ', t = %6.1f ms' % nest.GetKernelStatus()['time'])
pylab.title(name + ', t = %6.1f ms' % nest.GetKernelStatus()['biological_time'])

pylab.draw() # force drawing inside loop
pylab.show() # required by ``pyreport``
Expand Down
2 changes: 1 addition & 1 deletion examples/neuronview/neuronview.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def __init__(self):

def update_figure(self, spikes, potentials):

if nest.GetKernelStatus("time") != 0.0:
if nest.GetKernelStatus('biological_time') != 0.0:
self._figure.clear()

gs = gridspec.GridSpec(2, 1, height_ratios=[1, 4])
Expand Down
3 changes: 3 additions & 0 deletions libnestutil/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,7 @@
#define TARGET_BITS_SPLIT_STANDARD 0
#define TARGET_BITS_SPLIT_HPC 1

/* Whether to enable detailed NEST internal timers */
#cmakedefine TIMER_DETAILED 1

#endif // #ifndef CONFIG_H
4 changes: 4 additions & 0 deletions nestkernel/connection_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ nest::ConnectionManager::initialize()
// The following line is executed by all processes, no need to communicate
// this change in delays.
min_delay_ = max_delay_ = 1;

sw_construction_connect.reset();
}

void
Expand Down Expand Up @@ -180,6 +182,8 @@ nest::ConnectionManager::get_status( DictionaryDatum& dict )
def< long >( dict, names::num_connections, n );
def< bool >( dict, names::keep_source_table, keep_source_table_ );
def< bool >( dict, names::sort_connections_by_source, sort_connections_by_source_ );

def< double >( dict, names::time_construction_connect, sw_construction_connect.elapsed() );
}

DictionaryDatum
Expand Down
5 changes: 5 additions & 0 deletions nestkernel/connection_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

// Includes from libnestutil:
#include "manager_interface.h"
#include "stopwatch.h"

// Includes from nestkernel:
#include "conn_builder.h"
Expand Down Expand Up @@ -395,6 +396,10 @@ class ConnectionManager : public ManagerInterface

void set_stdp_eps( const double stdp_eps );

// public stop watch for benchmarking purposes
// start and stop in high-level connect functions in nestmodule.cpp and nest.cpp
Stopwatch sw_construction_connect;

private:
size_t get_num_target_data( const thread tid ) const;

Expand Down
77 changes: 68 additions & 9 deletions nestkernel/event_delivery_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ EventDeliveryManager::EventDeliveryManager()
, off_grid_spike_register_()
, send_buffer_secondary_events_()
, recv_buffer_secondary_events_()
, time_collocate_( 0.0 )
, time_communicate_( 0.0 )
, local_spike_counter_()
, send_buffer_spike_data_()
, recv_buffer_spike_data_()
Expand All @@ -80,7 +78,9 @@ EventDeliveryManager::initialize()

init_moduli();
local_spike_counter_.resize( num_threads, 0 );
reset_timers_counters();
reset_counters();
reset_timers_for_preparation();
reset_timers_for_dynamics();
spike_register_.resize( num_threads );
off_grid_spike_register_.resize( num_threads );
gather_completed_checker_.initialize( num_threads, false );
Expand Down Expand Up @@ -126,10 +126,15 @@ void
EventDeliveryManager::get_status( DictionaryDatum& dict )
{
def< bool >( dict, names::off_grid_spiking, off_grid_spiking_ );
def< double >( dict, names::time_collocate, time_collocate_ );
def< double >( dict, names::time_communicate, time_communicate_ );
def< unsigned long >(
dict, names::local_spike_counter, std::accumulate( local_spike_counter_.begin(), local_spike_counter_.end(), 0 ) );

#ifdef TIMER_DETAILED
def< double >( dict, names::time_collocate_spike_data, sw_collocate_spike_data_.elapsed() );
def< double >( dict, names::time_communicate_spike_data, sw_communicate_spike_data_.elapsed() );
def< double >( dict, names::time_deliver_spike_data, sw_deliver_spike_data_.elapsed() );
def< double >( dict, names::time_communicate_target_data, sw_communicate_target_data_.elapsed() );
#endif
}

void
Expand Down Expand Up @@ -256,17 +261,33 @@ EventDeliveryManager::update_moduli()
}

void
EventDeliveryManager::reset_timers_counters()
EventDeliveryManager::reset_counters()
{
time_collocate_ = 0.0;
time_communicate_ = 0.0;
for ( std::vector< unsigned long >::iterator it = local_spike_counter_.begin(); it != local_spike_counter_.end();
++it )
{
( *it ) = 0;
}
}

void
EventDeliveryManager::reset_timers_for_preparation()
{
#ifdef TIMER_DETAILED
sw_communicate_target_data_.reset();
#endif
}

void
EventDeliveryManager::reset_timers_for_dynamics()
{
#ifdef TIMER_DETAILED
sw_collocate_spike_data_.reset();
sw_communicate_spike_data_.reset();
sw_deliver_spike_data_.reset();
#endif
}

void
EventDeliveryManager::write_done_marker_secondary_events_( const bool done )
{
Expand Down Expand Up @@ -332,6 +353,12 @@ EventDeliveryManager::gather_spike_data_( const thread tid,
buffer_size_spike_data_has_changed_ = false;
}
} // of omp single; implicit barrier
#ifdef TIMER_DETAILED
if ( tid == 0 )
{
sw_collocate_spike_data_.start();
}
#endif

// Need to get new positions in case buffer size has changed
SendBufferPosition send_buffer_position(
Expand Down Expand Up @@ -364,6 +391,14 @@ EventDeliveryManager::gather_spike_data_( const thread tid,
#pragma omp barrier
}

#ifdef TIMER_DETAILED
if ( tid == 0 )
{
sw_collocate_spike_data_.stop();
sw_communicate_spike_data_.start();
}
#endif

// Communicate spikes using a single thread.
#pragma omp single
{
Expand All @@ -377,13 +412,29 @@ EventDeliveryManager::gather_spike_data_( const thread tid,
}
} // of omp single; implicit barrier

#ifdef TIMER_DETAILED
if ( tid == 0 )
{
sw_communicate_spike_data_.stop();
sw_deliver_spike_data_.start();
}
#endif

// Deliver spikes from receive buffer to ring buffers.
const bool deliver_completed = deliver_events_( tid, recv_buffer );
gather_completed_checker_[ tid ].logical_and( deliver_completed );

// Exit gather loop if all local threads and remote processes are
// done.
#pragma omp barrier

#ifdef TIMER_DETAILED
if ( tid == 0 )
{
sw_deliver_spike_data_.stop();
}
#endif

// Resize mpi buffers, if necessary and allowed.
if ( gather_completed_checker_.any_false() and kernel().mpi_manager.adaptive_spike_buffers() )
{
Expand Down Expand Up @@ -626,10 +677,18 @@ EventDeliveryManager::gather_target_data( const thread tid )
kernel().connection_manager.save_source_table_entry_point( tid );
#pragma omp barrier
kernel().connection_manager.clean_source_table( tid );

#pragma omp single
{
#ifdef TIMER_DETAILED
sw_communicate_target_data_.start();
#endif
kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ );
} // of omp single
#ifdef TIMER_DETAILED
sw_communicate_target_data_.stop();
#endif
} // of omp single (implicit barrier)


const bool distribute_completed = distribute_target_data_buffers_( tid );
gather_completed_checker_[ tid ].logical_and( distribute_completed );
Expand Down
36 changes: 21 additions & 15 deletions nestkernel/event_delivery_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,19 @@ class EventDeliveryManager : public ManagerInterface
void init_moduli();

/**
* Set cumulative time measurements for collocating buffers
* and for communication to zero; set local spike counter to zero.
* Set local spike counter to zero.
*/
virtual void reset_timers_counters();
virtual void reset_counters();

/**
* Set time measurements for internal profiling to zero (reg. prep.)
*/
virtual void reset_timers_for_preparation();

/**
* Set time measurements for internal profiling to zero (reg. sim. dyn.)
*/
virtual void reset_timers_for_dynamics();

private:
template < typename SpikeDataT >
Expand Down Expand Up @@ -409,18 +418,6 @@ class EventDeliveryManager : public ManagerInterface
std::vector< unsigned int > send_buffer_secondary_events_;
std::vector< unsigned int > recv_buffer_secondary_events_;

/**
* Time that was spent on collocation of MPI buffers during the last call to
* simulate.
*/
double time_collocate_;

/**
* Time that was spent on communication of events during the last call to
* simulate.
*/
double time_communicate_;

/**
* Number of generated spike events (both off- and on-grid) during the last
* call to simulate.
Expand All @@ -440,6 +437,15 @@ class EventDeliveryManager : public ManagerInterface
bool buffer_size_spike_data_has_changed_;

PerThreadBoolIndicator gather_completed_checker_;

#ifdef TIMER_DETAILED
// private stop watches for benchmarking purposes
// (intended for internal core developers, not for use in the public API)
Stopwatch sw_collocate_spike_data_;
Stopwatch sw_communicate_spike_data_;
Stopwatch sw_deliver_spike_data_;
Stopwatch sw_communicate_target_data_;
#endif
};

inline void
Expand Down
Loading