Skip to content

Commit

Permalink
- added tuning and graphics to cmake
Browse files Browse the repository at this point in the history
 - replaced RTDSC timers with a more standard one
 - made gnuplot generated plots available via tune_it.sh
 - moved timing points in tune.c to avoid elimination by
   optimization
  • Loading branch information
czurnieden committed Jul 1, 2024
1 parent 5809141 commit 6fa425e
Show file tree
Hide file tree
Showing 19 changed files with 385 additions and 129 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ tuning_list
etc/tune
2kprime.1
drprimes.txt
etc/multiplying*
etc/squaring*

# ignore stuff generated by "make manual" and "make poster"
*.aux
Expand Down Expand Up @@ -134,3 +136,20 @@ build*/
# kdevelop section
.kdev4/
*.kdev4

# ignore cmake files
CMakeFiles
Makefile
cmake_install.cmake












11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ include(sources.cmake)
# Options
#-----------------------------------------------------------------------------
option(BUILD_SHARED_LIBS "Build shared library and only the shared library if \"ON\", default is static" OFF)

option(BUILD_TUNING "Run a tuning program for the fast multiplication/squaring algorithms if \"ON\"" OFF)
option(BUILD_GRAPHS "Run a benchmark of the fast multiplication/squaring algorithms and make graphics if \"ON\"" OFF)
#-----------------------------------------------------------------------------
# Compose CFLAGS
#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -137,6 +138,14 @@ if(BUILD_TESTING)
add_subdirectory(demo)
endif()

#-----------------------------------------------------------------------------
# tuning and benchmark targets
#-----------------------------------------------------------------------------

if(BUILD_TUNING OR BUILD_GRAPHS)
add_subdirectory(etc ${CMAKE_CURRENT_SOURCE_DIR}/etc)
endif()

#-----------------------------------------------------------------------------
# Install/export targets and files
#-----------------------------------------------------------------------------
Expand Down
72 changes: 41 additions & 31 deletions demo/timing.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,42 +55,35 @@ static unsigned int lbit(void)
}
}

/* RDTSC from Scott Duplichan */
static uint64_t TIMFUNC(void)
{
#if defined __GNUC__
#if defined(__i386__) || defined(__x86_64__)
/* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html
* the old code always got a warning issued by gcc, clang did not complain...
*/
unsigned hi, lo;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");

while (__builtin_expect((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");

return result;
#endif

/* Microsoft and Intel Windows compilers */
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#if defined(_WIN32)
# include <windows.h>
#endif
return __getReg(3116);

static uint64_t TIMFUNC(void)
{
#if _POSIX_C_SOURCE >= 199309L
#define LTM_BILLION 1000000000
struct timespec ts;

/* TODO: Sets errno in case of error. Use? */
clock_gettime(CLOCK_MONOTONIC, &ts);
return (((uint64_t)ts.tv_sec) * LTM_BILLION + (uint64_t)ts.tv_nsec);
#elif defined(_WIN32)
LARGE_INTEGER ticks;
QueryPerformanceCounter(&ticks);
return (uint64_t)ticks.QuadPart;
#else
#error need rdtsc function for this build
clock_t t;
t = clock();
if (t < (clock_t)(0)) {
return (uint64_t)(0);
}
return (uint64_t)(t);
#endif
}


#define DO2(x) do { mp_err err = x; err = x; (void)err; }while(0)
#define DO4(x) DO2(x); DO2(x)
#define DO8(x) DO4(x); DO4(x)
Expand Down Expand Up @@ -141,6 +134,12 @@ int main(int argc, char **argv)
int n, cnt, ix, old_kara_m, old_kara_s, old_toom_m, old_toom_s;
unsigned rr;

#ifdef _WIN32
LARGE_INTEGER Frequency;
#else
struct timespec ts;
#endif

CHECK_OK(mp_init(&a));
CHECK_OK(mp_init(&b));
CHECK_OK(mp_init(&c));
Expand All @@ -150,10 +149,21 @@ int main(int argc, char **argv)

srand(LTM_TIMING_RAND_SEED);


#ifdef _WIN32
QueryPerformanceFrequency(&Frequency);
CLK_PER_SEC = (uint64) Frequency;
#elif _POSIX_C_SOURCE >= 199309L
/* returns -1 for an error and 0 for okay, sets errno (not used here) */
if (clock_getres(CLOCK_MONOTONIC, &ts)) {
fprintf(stderr, "%d, clock_getres failed\n", __LINE__);
exit(EXIT_FAILURE);
}
CLK_PER_SEC = LTM_BILLION / ts.tv_nsec;
#else
CLK_PER_SEC = TIMFUNC();
sleep(1);
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
#endif

printf("CLK_PER_SEC == %" PRIu64 "\n", CLK_PER_SEC);

Expand Down
76 changes: 73 additions & 3 deletions doc/bn.tex
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ \section{License}

\section{Building LibTomMath}


LibTomMath is meant to be very ``GCC friendly'' as it comes with a makefile well suited for GCC.
However, the library will also build in MSVC, Borland C out of the box. For any other ISO C
compiler a makefile will have to be made by the end
Expand Down Expand Up @@ -270,6 +271,53 @@ \subsection{Testing}
test was invoked. If an error is detected the program will exit with a dump of the relevant
numbers it was working with.

\subsection{CMake}
Some of the options above are also available with CMake.

\subsubsection{Shared Library}
The default is a static library. To produce a shared library use the CMake option
\begin{alltt}
-DBUILD_SHARED_LIBS=ON
\end{alltt}

\subsubsection{Testing}
To run the testsuite use option
\begin{alltt}
-DBUILD_TESTING=ON
\end{alltt}

\subsubsection{Tuning}
To run the tuning itself use option
\begin{alltt}
-DBUILD_TUNING=ON
\end{alltt}

To run a benchmark with the tuned library and print plots of the benchmark tables use option
\begin{alltt}
-DBUILD_GRAPHS=ON
\end{alltt}

To compile with LTO (Link Time Optimization) use option
\begin{alltt}
-DCOMPILE_LTO=ON
\end{alltt}

There are several build types available:
\begin{description}
\item[Debug] Build a library with debugging symbols (\texttt{-g3}) and no extra optimization
\item[Release] Build the normal release version (\texttt{-O3 -funroll-loops -fomit-frame-pointer}) (default)
\item[RelWithDebInfo] Build a library with debugging symbols (\texttt{-g3 -O2}) and a bit of optimization
\item[MinSizeRel] Build a small sized library (\texttt{-Os})
\end{description}
The build types are case-sensitive!

Choose one with:
\begin{alltt}
-DCMAKE_BUILD_TYPE=buildtype
\end{alltt}



\section{Build Configuration}
LibTomMath can configured at build time in two phases we shall call ``depends'' and
``trims''. Each phase changes how the library is built and they are applied one after another
Expand Down Expand Up @@ -1600,13 +1648,35 @@ \section{Tuning Polynomial Basis Routines}
make tune
\end{alltt}

This will run a benchmark, computes the medians, rewrites \texttt{bncore.c}, and recompiles
\texttt{bncore.c} and relinks the library.
With CMake
\begin{alltt}
cmake --build /path/to/build/dir -DBUILD_TUNING=ON
\end{alltt}


This will run a benchmark, computes the medians, rewrites \texttt{tommath\_cutoffs.h}, recompiles
\texttt{cutoffs.c}, and relinks the library.

The benchmark itself can be fine--tuned in the file \texttt{etc/tune\_it.sh}.

The program \texttt{etc/tune} is also able to print a list of values for printing curves with e.g.:
\texttt{gnuplot}. type \texttt{./etc/tune -h} to get a list of all available options.
\texttt{gnuplot}. Type \texttt{./etc/tune -h} to get a list of all the available options. There
are a lot.

To get some nice plots in \texttt{etc} try

\begin{alltt}
make graphs
\end{alltt}

With CMake
\begin{alltt}
cmake --build /path/to/build/dir -DBUILD_GRAPHS=ON
\end{alltt}

This will run a benchmark, computes the medians, rewrites \texttt{tommath\_cutoffs.h}, recompiles
\texttt{cutoffs.c}, relinks the library and runs gnuplot to print plots in the PNG format. The size
of the images is fixed in the file \texttt{etc/plot\_graphs.gp} and has to be changed manually.

\chapter{Modular Reduction}

Expand Down
51 changes: 51 additions & 0 deletions etc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# SPDX-License-Identifier: Unlicense
#
# LibTomMath, a free open source portable number theoretic multiple-precision
# integer (MPI) library written entirely in C.
#

cmake_minimum_required(VERSION 3.10)

set(LTM_TUNE tune-ltm)

# This file can be included from the top level or used stand-alone
if(PROJECT_NAME)
set(LIBRARY_NAME ${PROJECT_NAME})
else()
# Define an independent project and all the necessary stuff around
project(${LTM_TUNE}
LANGUAGES C)
set(LIBRARY_NAME libtommath)
find_package(${LIBRARY_NAME})
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE "Release")
endif()
endif()

add_executable(tune
${CMAKE_CURRENT_SOURCE_DIR}/tune.c
)

target_include_directories(tune PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/..
)

target_link_libraries(tune PRIVATE
${LIBRARY_NAME}
)

target_compile_options(tune PRIVATE
${LTM_C_FLAGS}
)
target_link_options(tune BEFORE PUBLIC
${LTM_LD_FLAGS}
)

if(BUILD_GRAPHS)
# used in tune_it.sh
find_program(GNUPLOT gnuplot)
add_custom_command(TARGET tune POST_BUILD COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tune_it.sh 1000 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} VERBATIM)
else()
add_custom_command(TARGET tune POST_BUILD COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tune_it.sh WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} VERBATIM)
endif()
9 changes: 7 additions & 2 deletions etc/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ LTM_TUNE_CFLAGS = $(CFLAGS) $(LTM_CFLAGS) -Wall -W -Wextra -Wshadow -O3 -I../
# libname when you can't install the lib with install
LIBNAME=../libtommath.a

all: pprime tune test_standalone mersenne drprime 2kprime mont
all: pprime tune test_standalone mersenne drprime 2kprime mont getlimbsize graph

#provable primes
pprime: pprime.o
Expand Down Expand Up @@ -36,10 +36,15 @@ drprime: drprime.o
mont: mont.o
$(CC) $(LTM_TUNE_CFLAGS) mont.o $(LIBNAME) -o mont

# Make pretty pictures (1000 is the maximum number of limbs to print for mul/sqr)
# "tune" runs twice because it runs automatically when build.
graphs: tune
./tune_it.sh 1000

clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat \
tuning_list multiplying squaring test *.da *.dyn *.dpi *~
tuning_list get_limbsize out *.da *.dyn *.dpi *~ cmake_install.cmake Makefile
rm -rf .libs
rm -rf CMakeFiles

.PHONY: tune
20 changes: 5 additions & 15 deletions etc/makefile.icc
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,10 @@ tune: tune.o
$(CC) $(CFLAGS) tune.o $(LIBNAME) -o tune
./tune_it.sh

# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
tune86: tune.c
nasm -f coff timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86

# for cygwin
tune86c: tune.c
nasm -f gnuwin32 timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86

#make tune86 for linux or any ELF format
tune86l: tune.c
nasm -f elf -DUSE_ELF timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
# Make pretty pictures (1000 is the maximum number of limbs to print for mul/sqr)
# "tune" runs twice because it runs automatically when build.
graphs: tune
./tune_it.sh 1000

# spits out mersenne primes
mersenne: mersenne.o
Expand All @@ -64,4 +54,4 @@ mont: mont.o


clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il tuning_list
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat get_limbsize *.il tuning_list
19 changes: 19 additions & 0 deletions etc/plot_graphs.gp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
set term pngcairo size 720,540
# Good for most colorblinds
set colorsequence podo

set key top left;

set ylabel "Time"
set xlabel "Operand size (limbs)"

set output "multiplying".ARG1.".png";
set title "Comparing fast and slow multiplying [".ARG1." bits limbsize]";
plot "multiplying".ARG1."" using 1:2 w lines t "slow", "multiplying".ARG1."" using 1:3 w lines t "fast"

set output "squaring".ARG1.".png";
set title "Comparing fast and slow squaring [".ARG1." bits limbsize]";
plot "squaring".ARG1."" using 1:2 w lines t "slow", "squaring".ARG1."" using 1:3 w lines t "fast"



Loading

0 comments on commit 6fa425e

Please sign in to comment.