Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility issues between Eigen and GPUs (OpenACC/CUDA) #728

Merged
merged 8 commits into from
Sep 6, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
[submodule "ext/cli11"]
path = ext/cli11
url = https://github.com/CLIUtils/CLI11.git
[submodule "ext/eigen"]
path = ext/eigen
url = https://github.com/eigenteam/eigen-git-mirror.git
[submodule "ext/spdlog"]
path = ext/spdlog
url = https://github.com/BlueBrain/spdlog.git
[submodule "ext/fmt"]
path = ext/fmt
url = https://github.com/fmtlib/fmt.git
[submodule "ext/eigen"]
path = ext/eigen
url = https://github.com/BlueBrain/eigen.git
2 changes: 1 addition & 1 deletion ext/eigen
Submodule eigen updated from b13875 to e5a8b0
24 changes: 24 additions & 0 deletions src/codegen/codegen_acc_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

#include "codegen/codegen_acc_visitor.hpp"

#include "ast/eigen_linear_solver_block.hpp"
#include "ast/integer.hpp"


using namespace fmt::literals;

Expand Down Expand Up @@ -73,6 +76,15 @@ void CodegenAccVisitor::print_backend_includes() {
printer->add_line("#include <cuda_runtime_api.h>");
printer->add_line("#include <openacc.h>");
}

if (info.eigen_linear_solver_exist && std::accumulate(info.state_vars.begin(),
info.state_vars.end(),
0,
[](int l, const SymbolType& variable) {
return l += variable->get_length();
}) > 4) {
printer->add_line("#include <partial_piv_lu/partial_piv_lu.h>");
}
}


Expand Down Expand Up @@ -124,6 +136,18 @@ void CodegenAccVisitor::print_net_send_buffering_grow() {
// can not grow buffer during gpu execution
}

void CodegenAccVisitor::print_eigen_linear_solver(const std::string& float_type,
int N,
const std::string& Xm,
kotsaloscv marked this conversation as resolved.
Show resolved Hide resolved
const std::string& Jm,
const std::string& Fm) {
if (N <= 4) {
printer->add_line("{0} = {1}.inverse()*{2};"_format(Xm, Jm, Fm));
} else {
printer->add_line("{0} = partialPivLu<{1}>({2}, {3});"_format(Xm, N, Jm, Fm));
}
}

/**
* Each kernel like nrn_init, nrn_state and nrn_cur could be offloaded
* to accelerator. In this case, at very top level, we print pragma
Expand Down
7 changes: 7 additions & 0 deletions src/codegen/codegen_acc_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ class CodegenAccVisitor: public CodegenCVisitor {
void print_net_send_buffering_grow() override;


void print_eigen_linear_solver(const std::string& float_type,
int N,
const std::string& X,
const std::string& Jm,
const std::string& F) override;


public:
CodegenAccVisitor(const std::string& mod_file,
const std::string& output_dir,
Expand Down
68 changes: 49 additions & 19 deletions src/codegen/codegen_c_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,21 +531,27 @@ std::string CodegenCVisitor::breakpoint_current(std::string current) const {


int CodegenCVisitor::float_variables_size() const {
auto count_length = [](const std::vector<SymbolType>& variables) -> int {
int length = 0;
for (const auto& variable: variables) {
length += variable->get_length();
}
return length;
auto count_length = [](int l, const SymbolType& variable) {
return l += variable->get_length();
};

int float_size = count_length(info.range_parameter_vars);
float_size += count_length(info.range_assigned_vars);
float_size += count_length(info.range_state_vars);
float_size += count_length(info.assigned_vars);
int float_size = std::accumulate(info.range_parameter_vars.begin(),
info.range_parameter_vars.end(),
0,
count_length);
float_size += std::accumulate(info.range_assigned_vars.begin(),
info.range_assigned_vars.end(),
0,
count_length);
float_size += std::accumulate(info.range_state_vars.begin(),
info.range_state_vars.end(),
0,
count_length);
float_size +=
std::accumulate(info.assigned_vars.begin(), info.assigned_vars.end(), 0, count_length);

/// all state variables for which we add Dstate variables
float_size += count_length(info.state_vars);
float_size += std::accumulate(info.state_vars.begin(), info.state_vars.end(), 0, count_length);

/// for v_unused variable
if (info.vectorize) {
Expand Down Expand Up @@ -1779,13 +1785,16 @@ void CodegenCVisitor::visit_eigen_newton_solver_block(const ast::EigenNewtonSolv
// try to use a different string for the matrices created by sympy in the form
// X_<random_number>, J_<random_number>, Jm_<random_number> and F_<random_number>
std::string X = find_var_unique_name("X");
std::string Xm = find_var_unique_name("Xm");
std::string J = find_var_unique_name("J");
std::string Jm = find_var_unique_name("Jm");
std::string F = find_var_unique_name("F");
std::string Fm = find_var_unique_name("Fm");

auto float_type = default_float_data_type();
int N = node.get_n_state_vars()->get_value();
printer->add_line("Eigen::Matrix<{}, {}, 1> {};"_format(float_type, N, X));
printer->add_line("Eigen::Matrix<{}, {}, 1> {};"_format(float_type, N, Xm));
printer->add_line("{}* {} = {}.data();"_format(float_type, X, Xm));
kotsaloscv marked this conversation as resolved.
Show resolved Hide resolved

print_statement_block(*node.get_setup_x_block(), false, false);

Expand Down Expand Up @@ -1823,12 +1832,14 @@ void CodegenCVisitor::visit_eigen_newton_solver_block(const ast::EigenNewtonSolv
"Eigen::Matrix<{0}, {1}, {1}>& {4}) {5}"_format(
float_type,
N,
X,
F,
Xm,
Fm,
Jm,
is_functor_const(variable_block, functor_block) ? "const " : ""));
printer->start_block();
printer->add_line("const {}* {} = {}.data();"_format(float_type, X, Xm));
printer->add_line("{}* {} = {}.data();"_format(float_type, J, Jm));
printer->add_line("{}* {} = {}.data();"_format(float_type, F, Fm));
print_statement_block(functor_block, false, false);
printer->end_block(2);

Expand All @@ -1846,7 +1857,7 @@ void CodegenCVisitor::visit_eigen_newton_solver_block(const ast::EigenNewtonSolv
printer->add_line("functor newton_functor(nt, inst, id, pnodecount, v, indexes);");
printer->add_line("newton_functor.initialize();");
printer->add_line(
"int newton_iterations = nmodl::newton::newton_solver({}, newton_functor);"_format(X));
"int newton_iterations = nmodl::newton::newton_solver({}, newton_functor);"_format(Xm));

// assign newton solver results in matrix X to state vars
print_statement_block(*node.get_update_states_block(), false, false);
Expand All @@ -1860,27 +1871,46 @@ void CodegenCVisitor::visit_eigen_linear_solver_block(const ast::EigenLinearSolv
// try to use a different string for the matrices created by sympy in the form
// X_<random_number>, J_<random_number>, Jm_<random_number> and F_<random_number>
std::string X = find_var_unique_name("X");
std::string Xm = find_var_unique_name("Xm");
std::string J = find_var_unique_name("J");
std::string Jm = find_var_unique_name("Jm");
std::string F = find_var_unique_name("F");
std::string Fm = find_var_unique_name("Fm");

const std::string float_type = default_float_data_type();
int N = node.get_n_state_vars()->get_value();
printer->add_line("Eigen::Matrix<{0}, {1}, 1> {2}, {3};"_format(float_type, N, X, F));
printer->add_line("Eigen::Matrix<{0}, {1}, 1> {2}, {3};"_format(float_type, N, Xm, Fm));
printer->add_line("Eigen::Matrix<{0}, {1}, {1}> {2};"_format(float_type, N, Jm));
printer->add_line("{}* {} = {}.data();"_format(float_type, X, Xm));
printer->add_line("{}* {} = {}.data();"_format(float_type, J, Jm));
printer->add_line("{}* {} = {}.data();"_format(float_type, F, Fm));
print_statement_block(*node.get_variable_block(), false, false);
print_statement_block(*node.get_initialize_block(), false, false);
print_statement_block(*node.get_setup_x_block(), false, false);

printer->add_newline();
printer->add_line(
"{0} = Eigen::PartialPivLU<Eigen::Ref<Eigen::Matrix<{1}, {2}, {2}>>>({3}).solve({4});"_format(
X, float_type, N, Jm, F));
print_eigen_linear_solver(float_type, N, Xm, Jm, Fm);
printer->add_newline();

print_statement_block(*node.get_update_states_block(), false, false);
print_statement_block(*node.get_finalize_block(), false, false);
}

void CodegenCVisitor::print_eigen_linear_solver(const std::string& float_type,
int N,
const std::string& Xm,
const std::string& Jm,
const std::string& Fm) {
if (N <= 4) {
// Faster compared to LU, given the template specialization in Eigen.
printer->add_line("{0} = {1}.inverse()*{2};"_format(Xm, Jm, Fm));
} else {
printer->add_line(
"{0} = Eigen::PartialPivLU<Eigen::Ref<Eigen::Matrix<{1}, {2}, {2}>>>({3}).solve({4});"_format(
Xm, float_type, N, Jm, Fm));
}
}

/****************************************************************************************/
/* Code-specific helper routines */
/****************************************************************************************/
Expand Down
6 changes: 6 additions & 0 deletions src/codegen/codegen_c_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <algorithm>
#include <cmath>
#include <ctime>
#include <numeric>
#include <ostream>
#include <string>
#include <utility>
Expand Down Expand Up @@ -1898,6 +1899,11 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
void visit_function_call(const ast::FunctionCall& node) override;
void visit_eigen_newton_solver_block(const ast::EigenNewtonSolverBlock& node) override;
void visit_eigen_linear_solver_block(const ast::EigenLinearSolverBlock& node) override;
virtual void print_eigen_linear_solver(const std::string& float_type,
int N,
const std::string& X,
kotsaloscv marked this conversation as resolved.
Show resolved Hide resolved
const std::string& Jm,
const std::string& F);
void visit_if_statement(const ast::IfStatement& node) override;
void visit_indexed_name(const ast::IndexedName& node) override;
void visit_integer(const ast::Integer& node) override;
Expand Down
13 changes: 9 additions & 4 deletions src/solver/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# =============================================================================
# Solver sources
# =============================================================================
set(SOLVER_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/newton/newton.hpp)
set(NEWTON_SOLVER_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/newton/newton.hpp)

# =============================================================================
# Copy necessary headers to build directory
# Copy necessary files to build directory
# =============================================================================
file(GLOB NMODL_SOLVER_HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/newton/*.h*")
file(COPY ${NMODL_SOLVER_HEADER_FILES} DESTINATION ${CMAKE_BINARY_DIR}/include/newton/)
# Newton
file(GLOB NMODL_NEWTON_SOLVER_HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/newton/*.h*")
file(COPY ${NMODL_NEWTON_SOLVER_HEADER_FILES} DESTINATION ${CMAKE_BINARY_DIR}/include/newton/)
# partial_piv_lu
file(GLOB NMODL_PARTIAL_PIV_LU_API_FILES "${CMAKE_CURRENT_SOURCE_DIR}/partial_piv_lu/*")
file(COPY ${NMODL_PARTIAL_PIV_LU_API_FILES} DESTINATION ${CMAKE_BINARY_DIR}/include/partial_piv_lu/)
# Eigen
file(COPY ${NMODL_PROJECT_SOURCE_DIR}/ext/eigen/Eigen DESTINATION ${CMAKE_BINARY_DIR}/include/)

# =============================================================================
Expand Down
17 changes: 15 additions & 2 deletions src/solver/newton/newton.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
* \brief Implementation of Newton method for solving system of non-linear equations
*/

#if defined(_OPENACC) && !defined(DISABLE_OPENACC)
pramodk marked this conversation as resolved.
Show resolved Hide resolved
#include "partial_piv_lu/partial_piv_lu.h"
#endif

#include <Eigen/LU>

namespace nmodl {
Expand Down Expand Up @@ -69,9 +73,13 @@ EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix<double, N, 1>& X,
// we have converged: return iteration count
return iter;
}
#if defined(_OPENACC) && !defined(DISABLE_OPENACC)
pramodk marked this conversation as resolved.
Show resolved Hide resolved
X -= partialPivLu<N>(J, F);
#else
// update X use in-place LU decomposition of J with partial pivoting
// (suitable for any N, but less efficient than .inverse() for N <=4)
X -= Eigen::PartialPivLU<Eigen::Ref<Eigen::Matrix<double, N, N>>>(J).solve(F);
#endif
}
// If we fail to converge after max_iter iterations, return -1
return -1;
Expand Down Expand Up @@ -142,10 +150,13 @@ EIGEN_DEVICE_FUNC int newton_numerical_diff_solver(Eigen::Matrix<double, N, 1>&
// restore X
X[i] += dX;
}
// update X
// use in-place LU decomposition of J with partial pivoting
#if defined(_OPENACC) && !defined(DISABLE_OPENACC)
X -= partialPivLu<N>(J, F);
#else
pramodk marked this conversation as resolved.
Show resolved Hide resolved
// update X use in-place LU decomposition of J with partial pivoting
// (suitable for any N, but less efficient than .inverse() for N <=4)
X -= Eigen::PartialPivLU<Eigen::Ref<Eigen::Matrix<double, N, N>>>(J).solve(F);
#endif
}
// If we fail to converge after max_iter iterations, return -1
return -1;
Expand All @@ -171,6 +182,8 @@ EIGEN_DEVICE_FUNC int newton_solver_small_N(Eigen::Matrix<double, N, 1>& X,
if (error < eps) {
return iter;
}
// The inverse can be called from within OpenACC regions without any issue, as opposed to
// Eigen::PartialPivLU.
X -= J.inverse() * F;
}
return -1;
Expand Down
36 changes: 36 additions & 0 deletions src/solver/partial_piv_lu/partial_piv_lu.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*************************************************************************
* Copyright (C) 2018-2019 Blue Brain Project
*
* This file is part of NMODL distributed under the terms of the GNU
* Lesser General Public License. See top-level LICENSE file for details.
*************************************************************************/

#include "partial_piv_lu/partial_piv_lu.h"

template<int dim>
EIGEN_DEVICE_FUNC
VecType<dim> partialPivLu(const MatType<dim>& A, const VecType<dim>& b)
{
return A.partialPivLu().solve(b);
}

// Explicit Template Instantiation
template EIGEN_DEVICE_FUNC VecType<1> partialPivLu<1>(const MatType<1>& A, const VecType<1>& b);
template EIGEN_DEVICE_FUNC VecType<2> partialPivLu<2>(const MatType<2>& A, const VecType<2>& b);
template EIGEN_DEVICE_FUNC VecType<3> partialPivLu<3>(const MatType<3>& A, const VecType<3>& b);
template EIGEN_DEVICE_FUNC VecType<4> partialPivLu<4>(const MatType<4>& A, const VecType<4>& b);
template EIGEN_DEVICE_FUNC VecType<5> partialPivLu<5>(const MatType<5>& A, const VecType<5>& b);
template EIGEN_DEVICE_FUNC VecType<6> partialPivLu<6>(const MatType<6>& A, const VecType<6>& b);
template EIGEN_DEVICE_FUNC VecType<7> partialPivLu<7>(const MatType<7>& A, const VecType<7>& b);
template EIGEN_DEVICE_FUNC VecType<8> partialPivLu<8>(const MatType<8>& A, const VecType<8>& b);
template EIGEN_DEVICE_FUNC VecType<9> partialPivLu<9>(const MatType<9>& A, const VecType<9>& b);
template EIGEN_DEVICE_FUNC VecType<10> partialPivLu<10>(const MatType<10>& A, const VecType<10>& b);
template EIGEN_DEVICE_FUNC VecType<11> partialPivLu<11>(const MatType<11>& A, const VecType<11>& b);
template EIGEN_DEVICE_FUNC VecType<12> partialPivLu<12>(const MatType<12>& A, const VecType<12>& b);
template EIGEN_DEVICE_FUNC VecType<13> partialPivLu<13>(const MatType<13>& A, const VecType<13>& b);
template EIGEN_DEVICE_FUNC VecType<14> partialPivLu<14>(const MatType<14>& A, const VecType<14>& b);
template EIGEN_DEVICE_FUNC VecType<15> partialPivLu<15>(const MatType<15>& A, const VecType<15>& b);
template EIGEN_DEVICE_FUNC VecType<16> partialPivLu<16>(const MatType<16>& A, const VecType<16>& b);

// Currently there is an issue in Eigen (GPU-branch) for matrices 17x17 and above.
// ToDo: Check in a future release if this issue is resolved!
28 changes: 28 additions & 0 deletions src/solver/partial_piv_lu/partial_piv_lu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*************************************************************************
* Copyright (C) 2018-2019 Blue Brain Project
*
* This file is part of NMODL distributed under the terms of the GNU
* Lesser General Public License. See top-level LICENSE file for details.
*************************************************************************/

#pragma once

#include "Eigen/Dense"
#include "Eigen/LU"

template<int dim>
using MatType = Eigen::Matrix<double, dim, dim, Eigen::ColMajor, dim, dim>;

template<int dim>
using VecType = Eigen::Matrix<double, dim, 1, Eigen::ColMajor, dim, 1>;

// Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly
// from within an OpenACC region. Therefore, we need to wrap them in a special API (decorate
// them with __device__ & acc routine tokens), which allows us to eventually call them from OpenACC.
// Calling these functions from CUDA kernels presents no issue ...

#if defined(_OPENACC) && !defined(DISABLE_OPENACC)
#pragma acc routine seq
#endif
template<int dim>
VecType<dim> partialPivLu(const MatType<dim>&, const VecType<dim>&);
4 changes: 2 additions & 2 deletions test/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ add_executable(
visitor/verbatim.cpp)
add_executable(testprinter printer/printer.cpp)
add_executable(testsymtab symtab/symbol_table.cpp)
add_executable(testnewton newton/newton.cpp ${SOLVER_SOURCE_FILES})
add_executable(testfast_math fast_math/fast_math.cpp ${SOLVER_SOURCE_FILES})
add_executable(testnewton newton/newton.cpp ${NEWTON_SOLVER_SOURCE_FILES})
add_executable(testfast_math fast_math/fast_math.cpp ${NEWTON_SOLVER_SOURCE_FILES})
add_executable(testunitlexer units/lexer.cpp)
add_executable(testunitparser units/parser.cpp)
add_executable(testcodegen codegen/main.cpp codegen/codegen_ispc.cpp codegen/codegen_helper.cpp
Expand Down