LibTorch support is now added to the build system.

During config provice -DTORCH=ON for the libraries to be downloaded and configured automatically. This is not using ExternalProject (like FFTW, FLTK and TBB). Because LibTorch also needs to be configured, libraries are required during config time. To support old CMAKE versions > 3.11 I've implemented the fetch and config myself using calls to linux commands, so not cross-platform. Additionally, all instances of the build-time definition of CUDA had to be replaced by _CUDA_ENABLED to avoid conflicts with LibTorch. This was originally a poor choise (made by those damn PhD students) and had to be fixed at some point anyways.
3dem · Jun 10, 2020 · f453d2c · f453d2c
1 parent d470c8e
commit f453d2c
Show file tree

Hide file tree

Showing 29 changed files with 235 additions and 195 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -156,7 +156,7 @@ if(CUDA_FOUND)
     endif()
 
     if(CUDA)
-        add_definitions(-DCUDA)
+        add_definitions(-D_CUDA_ENABLED)
     endif()
 else(CUDA_FOUND)
     message(STATUS "Using non-cuda compilation....")
@@ -355,6 +355,16 @@ if(PNG_FOUND)
 	add_definitions(-DHAVE_PNG)
 endif()
 
+
+# -----------------------------------------------------------------------------Torch--
+
+option(TORCH "Enable support for Torch" OFF)
+if(TORCH)
+    message(STATUS "Torch support requested by user.")
+    include(${CMAKE_SOURCE_DIR}/cmake/BuildTorch.cmake)
+    add_definitions(-D_TORCH_ENABLED)
+endif(TORCH)
+
 # ----------------------------------------------------------------------COPY SCRIPTS--
 
 if(FORCE_OWN_FFTW)

diff --git a/src/acc/acc_backprojector.h b/src/acc/acc_backprojector.h
@@ -1,14 +1,14 @@
 #ifndef ACC_BACKPROJECTOR_H_
 #define ACC_BACKPROJECTOR_H_
 
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 #  include <cuda_runtime.h>
 #endif
 #include "src/complex.h"
 #include "src/acc/settings.h"
 #include "src/acc/acc_ptr.h"
 
-#ifndef CUDA
+#ifndef _CUDA_ENABLED
 #  include <tbb/spin_mutex.h>
 #endif
 
@@ -22,8 +22,8 @@ class AccBackprojector
 	XFLOAT padding_factor;
 	size_t mdlXYZ;
 
-#ifndef CUDA
-	tbb::spin_mutex *mutexes;
+#ifndef _CUDA_ENABLED
+tbb::spin_mutex *mutexes;
 #endif
 
 	size_t allocaton_size;
@@ -43,8 +43,8 @@ class AccBackprojector
 				allocaton_size(0), voxelCount(0),
 				d_mdlReal(NULL), d_mdlImag(NULL), d_mdlWeight(NULL),
 				stream(0)
-#ifndef CUDA
-				, mutexes(0)
+#ifndef _CUDA_ENABLED
+, mutexes(0)
 #endif
 	{}
 

diff --git a/src/acc/acc_backprojector_impl.h b/src/acc/acc_backprojector_impl.h
@@ -32,7 +32,7 @@ size_t AccBackprojector::setMdlDim(
 		padding_factor = paddingFactor;
 
 		//Allocate space for model
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		HANDLE_ERROR(cudaMalloc( (void**) &d_mdlReal,   mdlXYZ * sizeof(XFLOAT)));
 		HANDLE_ERROR(cudaMalloc( (void**) &d_mdlImag,   mdlXYZ * sizeof(XFLOAT)));
 		HANDLE_ERROR(cudaMalloc( (void**) &d_mdlWeight, mdlXYZ * sizeof(XFLOAT)));
@@ -67,7 +67,7 @@ void AccBackprojector::initMdl()
 #endif
 
 	//Initiate model with zeros
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	DEBUG_HANDLE_ERROR(cudaMemset( d_mdlReal,   0, mdlXYZ * sizeof(XFLOAT)));
 	DEBUG_HANDLE_ERROR(cudaMemset( d_mdlImag,   0, mdlXYZ * sizeof(XFLOAT)));
 	DEBUG_HANDLE_ERROR(cudaMemset( d_mdlWeight, 0, mdlXYZ * sizeof(XFLOAT)));
@@ -83,7 +83,7 @@ void AccBackprojector::initMdl()
 
 void AccBackprojector::getMdlData(XFLOAT *r, XFLOAT *i, XFLOAT * w)
 {
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); //Make sure to wait for remaining kernel executions
 
 	DEBUG_HANDLE_ERROR(cudaMemcpyAsync( r, d_mdlReal,   mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream));
@@ -100,7 +100,7 @@ void AccBackprojector::getMdlData(XFLOAT *r, XFLOAT *i, XFLOAT * w)
 
 void AccBackprojector::getMdlDataPtrs(XFLOAT *& r, XFLOAT *& i, XFLOAT *& w)
 {
-#ifndef CUDA
+#ifndef _CUDA_ENABLED
 	r = d_mdlReal;
 	i = d_mdlImag;
 	w = d_mdlWeight;
@@ -122,7 +122,7 @@ void AccBackprojector::clear()
 
 	if (d_mdlReal != NULL)
 	{
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		DEBUG_HANDLE_ERROR(cudaFree(d_mdlReal));
 		DEBUG_HANDLE_ERROR(cudaFree(d_mdlImag));
 		DEBUG_HANDLE_ERROR(cudaFree(d_mdlWeight));

diff --git a/src/acc/acc_helper_functions.h b/src/acc/acc_helper_functions.h
@@ -148,7 +148,7 @@ void mapAllWeightsToMweights(
 template< typename T>
 void arrayOverThreshold(AccPtr<T> &data, AccPtr<bool> &passed, T threshold)
 {
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	int grid_size = ceil((float)data.getSize()/(float)OVER_THRESHOLD_BLOCK_SIZE);
 	cuda_kernel_array_over_threshold<T><<< grid_size, OVER_THRESHOLD_BLOCK_SIZE, 0, data.getStream() >>>(
 			~data,
@@ -180,7 +180,7 @@ size_t findThresholdIdxInCumulativeSum(AccPtr<T> &data, T threshold)
 	}
 	else
 	{
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		AccPtr<size_t >  idx(1, data.getStream(), data.getAllocator());
 		idx[0] = 0;
 
@@ -350,7 +350,7 @@ void runCenterFFT(MultidimArray< T >& v, bool forward, CudaCustomAllocator *allo
 
 		int dim=ceilf((float)(v.nzyxdim/(float)(2*CFTT_BLOCK_SIZE)));
 		AccUtilities::centerFFT_2D(dim, 0, CFTT_BLOCK_SIZE,
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 				~img_in,
 #else
 				&img_in[0],

diff --git a/src/acc/acc_helper_functions_impl.h b/src/acc/acc_helper_functions_impl.h
@@ -639,7 +639,7 @@ void runBackProjectKernel(
 
 	if(BP.mdlZ==1)
 	{
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		if(ctf_premultiplied)
 			cuda_kernel_backproject2D<true><<<imageCount,BP_2D_BLOCK_SIZE,0,optStream>>>(
 				d_img_real, d_img_imag,
@@ -689,7 +689,7 @@ void runBackProjectKernel(
 		if(do_sgd)
 		{
 			if(data_is_3D)
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 				if(ctf_premultiplied)
 					cuda_kernel_backprojectSGD<true, true><<<imageCount,BP_DATA3D_BLOCK_SIZE,0,optStream>>>(
 						projector, d_img_real, d_img_imag,
@@ -734,7 +734,7 @@ void runBackProjectKernel(
 
 #endif
 			else
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 				if(ctf_premultiplied)
 					cuda_kernel_backprojectSGD<false, true><<<imageCount,BP_REF3D_BLOCK_SIZE,0,optStream>>>(
 						projector, d_img_real, d_img_imag,
@@ -782,7 +782,7 @@ void runBackProjectKernel(
 		else
 		{
 			if(data_is_3D)
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 				if(ctf_premultiplied)
 					cuda_kernel_backproject3D<true, true><<<imageCount,BP_DATA3D_BLOCK_SIZE,0,optStream>>>(
 						d_img_real, d_img_imag,
@@ -828,7 +828,7 @@ void runBackProjectKernel(
 
 #endif
 			else
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 			    if(ctf_premultiplied)
 					cuda_kernel_backproject3D<false, true><<<imageCount,BP_REF3D_BLOCK_SIZE,0,optStream>>>(
 						d_img_real, d_img_imag,
@@ -914,7 +914,7 @@ void mapAllWeightsToMweights(
 {
 	size_t combinations = orientation_num*translation_num;
 	int grid_size = ceil((float)(combinations)/(float)WEIGHT_MAP_BLOCK_SIZE);
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	cuda_kernel_allweights_to_mweights<<< grid_size, WEIGHT_MAP_BLOCK_SIZE, 0, stream >>>(
 			d_iorient,
 			d_allweights,
@@ -1630,7 +1630,7 @@ void runCollect2jobs(	int grid_dim,
 						)
 {
 	if (data_is_3D) {
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	dim3 numblocks(grid_dim);
 	size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*5; // x+y+z+myp+weights
 	cuda_kernel_collect2jobs<true><<<numblocks,SUMW_BLOCK_SIZE,shared_buffer>>>(
@@ -1682,7 +1682,7 @@ void runCollect2jobs(	int grid_dim,
 	}
 	else
 	{
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	dim3 numblocks(grid_dim);
 	size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*4; // x+y+myp+weights
 	cuda_kernel_collect2jobs<false><<<numblocks,SUMW_BLOCK_SIZE,shared_buffer>>>(
@@ -1804,7 +1804,7 @@ void windowFourierTransform2(
 	if(oX==iX)
 	{
 		HANDLE_ERROR(cudaStreamSynchronize(d_in.getStream()));
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		cudaCpyDeviceToDevice(&d_in(pos), ~d_out, oX*oY*oZ*Npsi, d_out.getStream() );
 #else
 		memcpy(&d_out[0], &d_in[0], oX*oY*oZ*Npsi*sizeof(ACCCOMPLEX));
@@ -1816,7 +1816,7 @@ void windowFourierTransform2(
 	{
 		long int max_r2 = (iX - 1) * (iX - 1);
 
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		dim3 grid_dim(ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi);
 		cuda_kernel_window_fourier_transform<true><<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>(
 				&d_in(pos),
@@ -1843,7 +1843,7 @@ void windowFourierTransform2(
 	}
 	else
 	{
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		dim3 grid_dim(ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi);
 		cuda_kernel_window_fourier_transform<false><<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>(
 				&d_in(pos),

diff --git a/src/acc/acc_ml_optimiser_impl.h b/src/acc/acc_ml_optimiser_impl.h
@@ -1672,7 +1672,7 @@ void convertAllSquaredDifferencesToWeights(unsigned exp_ipass,
 			DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread));
 
 			//Set all device-located weights to zero, and only the smallest one to 1.
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 			DEBUG_HANDLE_ERROR(cudaMemsetAsync(~(PassWeights[img_id].weights), 0.f, PassWeights[img_id].weights.getSize()*sizeof(XFLOAT),0));
 
 			XFLOAT unity=1;

diff --git a/src/acc/acc_projector.h b/src/acc/acc_projector.h
@@ -7,7 +7,7 @@
 #include "src/acc/acc_ptr.h"
 //#include <cuda_runtime.h>
 //#include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh"
-#ifndef CUDA
+#ifndef _CUDA_ENABLED
 #include <complex>
 #endif
 
@@ -30,7 +30,7 @@ class AccProjector
 
 	size_t pitch2D;
 #else
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	XFLOAT *mdlReal, *mdlImag;
 #else
 	std::complex<XFLOAT> *mdlComplex;
@@ -56,7 +56,7 @@ class AccProjector
 		mdlImag = 0;
 		pitch2D = 0;
 #else
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 		mdlReal = 0;
 		mdlImag = 0;
 #else
@@ -73,8 +73,8 @@ class AccProjector
 
 	void initMdl(XFLOAT *real, XFLOAT *imag);
 	void initMdl(Complex *data);
-#ifndef CUDA
-	void initMdl(std::complex<XFLOAT> *data);
+#ifndef _CUDA_ENABLED
+void initMdl(std::complex<XFLOAT> *data);
 #endif
 
 	void clear();

diff --git a/src/acc/acc_projector_impl.h b/src/acc/acc_projector_impl.h
@@ -102,7 +102,7 @@ bool AccProjector::setMdlDim(
 	HANDLE_ERROR(cudaCreateTextureObject(mdlImag, &resDesc_imag, &texDesc, NULL));
 
 #else
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlReal, mdlXYZ * sizeof(XFLOAT)));
 	DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlImag, mdlXYZ * sizeof(XFLOAT)));
 #else
@@ -120,7 +120,7 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
         printf("DEBUG_ERROR: Model dimensions must be set with setMdlDim before call to setMdlData.");
 		CRITICAL(ERR_MDLDIM);
 	}
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	if (mdlReal == NULL)
 	{
         printf("DEBUG_ERROR: initMdl called before call to setMdlData.");
@@ -157,7 +157,7 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
 		DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayImag2D, pitch2D, imag, sizeof(XFLOAT) * mdlX, sizeof(XFLOAT) * mdlX, mdlY, cudaMemcpyHostToDevice));
 	}
 #else
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	DEBUG_HANDLE_ERROR(cudaMemcpy( mdlReal, real, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice));
 	DEBUG_HANDLE_ERROR(cudaMemcpy( mdlImag, imag, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice));
 #else
@@ -171,7 +171,7 @@ void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag)
 
 }
 
-#ifndef CUDA
+#ifndef _CUDA_ENABLED
 void AccProjector::initMdl(std::complex<XFLOAT> *data)
 {
 	mdlComplex = data;  // No copy needed - everyone shares the complex reference arrays
@@ -211,7 +211,7 @@ void AccProjector::clear()
 	padding_factor = 0;
 	allocaton_size = 0;
 
-#ifdef CUDA
+#ifdef _CUDA_ENABLED
 	if (mdlReal != 0)
 	{
 #ifndef PROJECTOR_NO_TEXTURES