Skip to content

Commit

Permalink
LibTorch support is now added to the build system.
Browse files Browse the repository at this point in the history
During config provice -DTORCH=ON for the libraries to be downloaded and configured automatically. This is not using ExternalProject (like FFTW, FLTK and TBB). Because LibTorch also needs to be configured, libraries are required during config time. To support old CMAKE versions > 3.11 I've implemented the fetch and config myself using calls to linux commands, so not cross-platform.
Additionally, all instances of the build-time definition of CUDA had to be replaced by _CUDA_ENABLED to avoid conflicts with LibTorch. This was originally a poor choise (made by those damn PhD students) and had to be fixed at some point anyways.
  • Loading branch information
dkimanius committed Jun 10, 2020
1 parent d470c8e commit f453d2c
Show file tree
Hide file tree
Showing 29 changed files with 235 additions and 195 deletions.
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ if(CUDA_FOUND)
endif()

if(CUDA)
add_definitions(-DCUDA)
add_definitions(-D_CUDA_ENABLED)
endif()
else(CUDA_FOUND)
message(STATUS "Using non-cuda compilation....")
Expand Down Expand Up @@ -355,6 +355,16 @@ if(PNG_FOUND)
add_definitions(-DHAVE_PNG)
endif()


# -----------------------------------------------------------------------------Torch--

option(TORCH "Enable support for Torch" OFF)
if(TORCH)
message(STATUS "Torch support requested by user.")
include(${CMAKE_SOURCE_DIR}/cmake/BuildTorch.cmake)
add_definitions(-D_TORCH_ENABLED)
endif(TORCH)

# ----------------------------------------------------------------------COPY SCRIPTS--

if(FORCE_OWN_FFTW)
Expand Down
12 changes: 6 additions & 6 deletions src/acc/acc_backprojector.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#ifndef ACC_BACKPROJECTOR_H_
#define ACC_BACKPROJECTOR_H_

#ifdef CUDA
#ifdef _CUDA_ENABLED
# include <cuda_runtime.h>
#endif
#include "src/complex.h"
#include "src/acc/settings.h"
#include "src/acc/acc_ptr.h"

#ifndef CUDA
#ifndef _CUDA_ENABLED
# include <tbb/spin_mutex.h>
#endif

Expand All @@ -22,8 +22,8 @@ class AccBackprojector
XFLOAT padding_factor;
size_t mdlXYZ;

#ifndef CUDA
tbb::spin_mutex *mutexes;
#ifndef _CUDA_ENABLED
tbb::spin_mutex *mutexes;
#endif

size_t allocaton_size;
Expand All @@ -43,8 +43,8 @@ class AccBackprojector
allocaton_size(0), voxelCount(0),
d_mdlReal(NULL), d_mdlImag(NULL), d_mdlWeight(NULL),
stream(0)
#ifndef CUDA
, mutexes(0)
#ifndef _CUDA_ENABLED
, mutexes(0)
#endif
{}

Expand Down
10 changes: 5 additions & 5 deletions src/acc/acc_backprojector_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ size_t AccBackprojector::setMdlDim(
padding_factor = paddingFactor;

//Allocate space for model
#ifdef CUDA
#ifdef _CUDA_ENABLED
HANDLE_ERROR(cudaMalloc( (void**) &d_mdlReal, mdlXYZ * sizeof(XFLOAT)));
HANDLE_ERROR(cudaMalloc( (void**) &d_mdlImag, mdlXYZ * sizeof(XFLOAT)));
HANDLE_ERROR(cudaMalloc( (void**) &d_mdlWeight, mdlXYZ * sizeof(XFLOAT)));
Expand Down Expand Up @@ -67,7 +67,7 @@ void AccBackprojector::initMdl()
#endif

//Initiate model with zeros
#ifdef CUDA
#ifdef _CUDA_ENABLED
DEBUG_HANDLE_ERROR(cudaMemset( d_mdlReal, 0, mdlXYZ * sizeof(XFLOAT)));
DEBUG_HANDLE_ERROR(cudaMemset( d_mdlImag, 0, mdlXYZ * sizeof(XFLOAT)));
DEBUG_HANDLE_ERROR(cudaMemset( d_mdlWeight, 0, mdlXYZ * sizeof(XFLOAT)));
Expand All @@ -83,7 +83,7 @@ void AccBackprojector::initMdl()

void AccBackprojector::getMdlData(XFLOAT *r, XFLOAT *i, XFLOAT * w)
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); //Make sure to wait for remaining kernel executions

DEBUG_HANDLE_ERROR(cudaMemcpyAsync( r, d_mdlReal, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream));
Expand All @@ -100,7 +100,7 @@ void AccBackprojector::getMdlData(XFLOAT *r, XFLOAT *i, XFLOAT * w)

void AccBackprojector::getMdlDataPtrs(XFLOAT *& r, XFLOAT *& i, XFLOAT *& w)
{
#ifndef CUDA
#ifndef _CUDA_ENABLED
r = d_mdlReal;
i = d_mdlImag;
w = d_mdlWeight;
Expand All @@ -122,7 +122,7 @@ void AccBackprojector::clear()

if (d_mdlReal != NULL)
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
DEBUG_HANDLE_ERROR(cudaFree(d_mdlReal));
DEBUG_HANDLE_ERROR(cudaFree(d_mdlImag));
DEBUG_HANDLE_ERROR(cudaFree(d_mdlWeight));
Expand Down
6 changes: 3 additions & 3 deletions src/acc/acc_helper_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ void mapAllWeightsToMweights(
template< typename T>
void arrayOverThreshold(AccPtr<T> &data, AccPtr<bool> &passed, T threshold)
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
int grid_size = ceil((float)data.getSize()/(float)OVER_THRESHOLD_BLOCK_SIZE);
cuda_kernel_array_over_threshold<T><<< grid_size, OVER_THRESHOLD_BLOCK_SIZE, 0, data.getStream() >>>(
~data,
Expand Down Expand Up @@ -180,7 +180,7 @@ size_t findThresholdIdxInCumulativeSum(AccPtr<T> &data, T threshold)
}
else
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
AccPtr<size_t > idx(1, data.getStream(), data.getAllocator());
idx[0] = 0;

Expand Down Expand Up @@ -350,7 +350,7 @@ void runCenterFFT(MultidimArray< T >& v, bool forward, CudaCustomAllocator *allo

int dim=ceilf((float)(v.nzyxdim/(float)(2*CFTT_BLOCK_SIZE)));
AccUtilities::centerFFT_2D(dim, 0, CFTT_BLOCK_SIZE,
#ifdef CUDA
#ifdef _CUDA_ENABLED
~img_in,
#else
&img_in[0],
Expand Down
22 changes: 11 additions & 11 deletions src/acc/acc_helper_functions_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ void runBackProjectKernel(

if(BP.mdlZ==1)
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
if(ctf_premultiplied)
cuda_kernel_backproject2D<true><<<imageCount,BP_2D_BLOCK_SIZE,0,optStream>>>(
d_img_real, d_img_imag,
Expand Down Expand Up @@ -689,7 +689,7 @@ void runBackProjectKernel(
if(do_sgd)
{
if(data_is_3D)
#ifdef CUDA
#ifdef _CUDA_ENABLED
if(ctf_premultiplied)
cuda_kernel_backprojectSGD<true, true><<<imageCount,BP_DATA3D_BLOCK_SIZE,0,optStream>>>(
projector, d_img_real, d_img_imag,
Expand Down Expand Up @@ -734,7 +734,7 @@ void runBackProjectKernel(

#endif
else
#ifdef CUDA
#ifdef _CUDA_ENABLED
if(ctf_premultiplied)
cuda_kernel_backprojectSGD<false, true><<<imageCount,BP_REF3D_BLOCK_SIZE,0,optStream>>>(
projector, d_img_real, d_img_imag,
Expand Down Expand Up @@ -782,7 +782,7 @@ void runBackProjectKernel(
else
{
if(data_is_3D)
#ifdef CUDA
#ifdef _CUDA_ENABLED
if(ctf_premultiplied)
cuda_kernel_backproject3D<true, true><<<imageCount,BP_DATA3D_BLOCK_SIZE,0,optStream>>>(
d_img_real, d_img_imag,
Expand Down Expand Up @@ -828,7 +828,7 @@ void runBackProjectKernel(

#endif
else
#ifdef CUDA
#ifdef _CUDA_ENABLED
if(ctf_premultiplied)
cuda_kernel_backproject3D<false, true><<<imageCount,BP_REF3D_BLOCK_SIZE,0,optStream>>>(
d_img_real, d_img_imag,
Expand Down Expand Up @@ -914,7 +914,7 @@ void mapAllWeightsToMweights(
{
size_t combinations = orientation_num*translation_num;
int grid_size = ceil((float)(combinations)/(float)WEIGHT_MAP_BLOCK_SIZE);
#ifdef CUDA
#ifdef _CUDA_ENABLED
cuda_kernel_allweights_to_mweights<<< grid_size, WEIGHT_MAP_BLOCK_SIZE, 0, stream >>>(
d_iorient,
d_allweights,
Expand Down Expand Up @@ -1630,7 +1630,7 @@ void runCollect2jobs( int grid_dim,
)
{
if (data_is_3D) {
#ifdef CUDA
#ifdef _CUDA_ENABLED
dim3 numblocks(grid_dim);
size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*5; // x+y+z+myp+weights
cuda_kernel_collect2jobs<true><<<numblocks,SUMW_BLOCK_SIZE,shared_buffer>>>(
Expand Down Expand Up @@ -1682,7 +1682,7 @@ void runCollect2jobs( int grid_dim,
}
else
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
dim3 numblocks(grid_dim);
size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*4; // x+y+myp+weights
cuda_kernel_collect2jobs<false><<<numblocks,SUMW_BLOCK_SIZE,shared_buffer>>>(
Expand Down Expand Up @@ -1804,7 +1804,7 @@ void windowFourierTransform2(
if(oX==iX)
{
HANDLE_ERROR(cudaStreamSynchronize(d_in.getStream()));
#ifdef CUDA
#ifdef _CUDA_ENABLED
cudaCpyDeviceToDevice(&d_in(pos), ~d_out, oX*oY*oZ*Npsi, d_out.getStream() );
#else
memcpy(&d_out[0], &d_in[0], oX*oY*oZ*Npsi*sizeof(ACCCOMPLEX));
Expand All @@ -1816,7 +1816,7 @@ void windowFourierTransform2(
{
long int max_r2 = (iX - 1) * (iX - 1);

#ifdef CUDA
#ifdef _CUDA_ENABLED
dim3 grid_dim(ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi);
cuda_kernel_window_fourier_transform<true><<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>(
&d_in(pos),
Expand All @@ -1843,7 +1843,7 @@ void windowFourierTransform2(
}
else
{
#ifdef CUDA
#ifdef _CUDA_ENABLED
dim3 grid_dim(ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi);
cuda_kernel_window_fourier_transform<false><<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>(
&d_in(pos),
Expand Down
2 changes: 1 addition & 1 deletion src/acc/acc_ml_optimiser_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1672,7 +1672,7 @@ void convertAllSquaredDifferencesToWeights(unsigned exp_ipass,
DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread));

//Set all device-located weights to zero, and only the smallest one to 1.
#ifdef CUDA
#ifdef _CUDA_ENABLED
DEBUG_HANDLE_ERROR(cudaMemsetAsync(~(PassWeights[img_id].weights), 0.f, PassWeights[img_id].weights.getSize()*sizeof(XFLOAT),0));

XFLOAT unity=1;
Expand Down
10 changes: 5 additions & 5 deletions src/acc/acc_projector.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "src/acc/acc_ptr.h"
//#include <cuda_runtime.h>
//#include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh"
#ifndef CUDA
#ifndef _CUDA_ENABLED
#include <complex>
#endif

Expand All @@ -30,7 +30,7 @@ class AccProjector

size_t pitch2D;
#else
#ifdef CUDA
#ifdef _CUDA_ENABLED
XFLOAT *mdlReal, *mdlImag;
#else
std::complex<XFLOAT> *mdlComplex;
Expand All @@ -56,7 +56,7 @@ class AccProjector
mdlImag = 0;
pitch2D = 0;
#else
#ifdef CUDA
#ifdef _CUDA_ENABLED
mdlReal = 0;
mdlImag = 0;
#else
Expand All @@ -73,8 +73,8 @@ class AccProjector

void initMdl(XFLOAT *real, XFLOAT *imag);
void initMdl(Complex *data);
#ifndef CUDA
void initMdl(std::complex<XFLOAT> *data);
#ifndef _CUDA_ENABLED
void initMdl(std::complex<XFLOAT> *data);
#endif

void clear();
Expand Down
10 changes: 5 additions & 5 deletions src/acc/acc_projector_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ bool AccProjector::setMdlDim(
HANDLE_ERROR(cudaCreateTextureObject(mdlImag, &resDesc_imag, &texDesc, NULL));

#else
#ifdef CUDA
#ifdef _CUDA_ENABLED
DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlReal, mdlXYZ * sizeof(XFLOAT)));
DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlImag, mdlXYZ * sizeof(XFLOAT)));
#else
Expand All @@ -120,7 +120,7 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
printf("DEBUG_ERROR: Model dimensions must be set with setMdlDim before call to setMdlData.");
CRITICAL(ERR_MDLDIM);
}
#ifdef CUDA
#ifdef _CUDA_ENABLED
if (mdlReal == NULL)
{
printf("DEBUG_ERROR: initMdl called before call to setMdlData.");
Expand Down Expand Up @@ -157,7 +157,7 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayImag2D, pitch2D, imag, sizeof(XFLOAT) * mdlX, sizeof(XFLOAT) * mdlX, mdlY, cudaMemcpyHostToDevice));
}
#else
#ifdef CUDA
#ifdef _CUDA_ENABLED
DEBUG_HANDLE_ERROR(cudaMemcpy( mdlReal, real, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice));
DEBUG_HANDLE_ERROR(cudaMemcpy( mdlImag, imag, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice));
#else
Expand All @@ -171,7 +171,7 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)

}

#ifndef CUDA
#ifndef _CUDA_ENABLED
void AccProjector::initMdl(std::complex<XFLOAT> *data)
{
mdlComplex = data; // No copy needed - everyone shares the complex reference arrays
Expand Down Expand Up @@ -211,7 +211,7 @@ void AccProjector::clear()
padding_factor = 0;
allocaton_size = 0;

#ifdef CUDA
#ifdef _CUDA_ENABLED
if (mdlReal != 0)
{
#ifndef PROJECTOR_NO_TEXTURES
Expand Down
Loading

0 comments on commit f453d2c

Please sign in to comment.