Skip to content

Commit

Permalink
Attempt to fix for clang 15 (#93)
Browse files Browse the repository at this point in the history
* Update Xoshiro return type

* Prints to help debug in makefile

* Try and resolve hdf5 version issue

* Pin h5py version in tests

* Pin exact versions

* Revert the hdf5 installation

* Revert original change to check for segfault

* Move the testing to gha

* Try to get segfault backtrace

* Change library location

* Attempt to add ssh to gha

* Try ssh again

* Fix typo in yaml

* Change install command

* Use single threaded tests

* Revert changes in the wrapper

* Add tests back to the azure version

* Try adding libgomp via mamba, single multicore test

* Another interactive debugging run

* Forgot to remove delete line

* Check for keyboard interrupt only on main thread

* Add test for openmp dists; try to fix dist segfault

* Try restricting signal check to thread 0
  • Loading branch information
johnlees authored Dec 19, 2023
1 parent 06cc248 commit 7ee661b
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 35 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/azure_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Python package
# Create and test a Python package on multiple Python versions.
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
# https://docs.microsoft.com/azure/devops/pipelines/languages/python

name: Run tests

on: [push]

jobs:
test:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Get current date
id: date
run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"
- name: Install Conda environment from environment.yml
uses: mamba-org/setup-micromamba@v1
with:
micromamba-version: '1.4.6-0'
environment-file: environment.yml
# persist on the same day.
cache-environment-key: environment-${{ steps.date.outputs.date }}
cache-downloads-key: downloads-${{ steps.date.outputs.date }}
- name: Install and run_test.py
shell: bash -l {0}
run: |
python -m pip install --no-deps --ignore-installed . -vvv
cd test && python run_test.py
## For debugging
- name: Install debug version
if: failure()
shell: bash -l {0}
run: |
python setup.py build_ext --debug install
- name: Setup tmate session for interactive debugging
if: failure()
uses: mxschmitt/action-tmate@v3
with:
limit-access-to-actor: true
detached: true
13 changes: 7 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,13 @@ if(DEFINED ENV{CONDA_PREFIX})
include_directories($ENV{CONDA_PREFIX}/include)
link_directories($ENV{CONDA_PREFIX}/lib)
link_directories($ENV{CONDA_PREFIX}/lib/intel64)
else()
set(HDF5_FIND_DEBUG TRUE)
find_package(HDF5 REQUIRED COMPONENTS CXX)
include_directories(${HDF5_INCLUDE_DIRS})
endif()

# Add libraries

find_package(HDF5 REQUIRED COMPONENTS CXX)
include_directories(${HDF5_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/vendor/highfive/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)

Expand Down Expand Up @@ -148,11 +149,11 @@ if(CMAKE_CUDA_COMPILER)
#set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_ARCHITECTURES OFF)
endif()
target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen
${HDF5_LIBRARIES} ${BLAS_LIBRARIES} gfortran m dl)
${BLAS_LIBRARIES} gfortran m dl)
if(DEFINED ENV{CONDA_PREFIX} AND (NOT APPLE OR CMAKE_COMPILER_IS_GNUCC OR ENV{SKETCHLIB_INSTALL} EQUAL "conda"))
target_link_libraries("${TARGET_NAME}" PRIVATE gomp z)
target_link_libraries("${TARGET_NAME}" PRIVATE hdf5_cpp hdf5 gomp z)
else()
target_link_libraries("${TARGET_NAME}" PRIVATE ZLIB::ZLIB)
target_link_libraries("${TARGET_NAME}" PRIVATE ${HDF5_LIBRARIES} ZLIB::ZLIB)
find_package(OpenMP)
if(OpenMP_CXX_FOUND)
target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
Expand Down
5 changes: 1 addition & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# Python package
# Create and test a Python package on multiple Python versions.
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
# https://docs.microsoft.com/azure/devops/pipelines/languages/python
# This just checks the package can be installed using CUDA, no testing

trigger:
- master
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- highfive
- hdf5
- h5py
- nlohmann_json
- libgomp
- openblas
- libgfortran-ng
- nvcc_linux-64
Expand Down
2 changes: 1 addition & 1 deletion pp_sketch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

'''PopPUNK sketching functions'''

__version__ = '2.1.1'
__version__ = '2.1.2'
4 changes: 4 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ install_python: python
install $(PYTHON_LIB) $(PYTHON_LIB_PATH)

gpu/dist.cu.o:
echo ${CUDAFLAGS}
echo ${CPPFLAGS}
echo ${CXXFLAGS}
echo ${CFLAGS}
nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/dist.cu -o $@

gpu/sketch.cu.o:
Expand Down
28 changes: 16 additions & 12 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ std::vector<Reference> create_sketches(
std::vector<std::runtime_error> errors;
#pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads)
for (unsigned int i = 0; i < names.size(); i++) {
if (interrupt || PyErr_CheckSignals() != 0) {
interrupt = true;
} else {
if (!interrupt) {
try {
SeqBuf seq_in(files[i], kmer_lengths.back());
sketches[i] = Reference(names[i], seq_in, kmer_seeds, sketchsize64,
Expand All @@ -101,6 +99,9 @@ std::vector<Reference> create_sketches(

if (omp_get_thread_num() == 0) {
sketch_progress.tick_count(done_count);
if (PyErr_CheckSignals() != 0) {
interrupt = true;
}
}
}
sketch_progress.finalise();
Expand Down Expand Up @@ -198,9 +199,7 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
// Iterate upper triangle
#pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads) shared(progress)
for (size_t i = 0; i < ref_sketches.size(); i++) {
if (interrupt || PyErr_CheckSignals() != 0) {
interrupt = true;
} else {
if (!interrupt) {
for (size_t j = i + 1; j < ref_sketches.size(); j++) {
size_t pos = square_to_condensed(i, j, ref_sketches.size());
if (jaccard) {
Expand All @@ -219,6 +218,9 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
{
progress += MAX(1, n_progress_ticks / dist_rows);
dist_progress.tick_count(progress);
if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) {
interrupt = true;
}
}
}
}
Expand All @@ -244,9 +246,7 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
#pragma omp parallel for collapse(2) schedule(static) num_threads(num_threads)
for (unsigned int q_idx = 0; q_idx < query_sketches.size(); q_idx++) {
for (unsigned int r_idx = 0; r_idx < ref_sketches.size(); r_idx++) {
if (interrupt || PyErr_CheckSignals() != 0) {
interrupt = true;
} else {
if (!interrupt) {
const long dist_row = q_idx * ref_sketches.size() + r_idx;
if (jaccard) {
for (unsigned int kmer_idx = 0; kmer_idx < kmer_lengths.size();
Expand All @@ -270,6 +270,9 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
{
progress += MAX(1, n_progress_ticks / dist_rows);
dist_progress.tick_count(progress);
if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) {
interrupt = true;
}
}
}
}
Expand Down Expand Up @@ -342,9 +345,7 @@ sparse_coo query_db_sparse(std::vector<Reference> &ref_sketches,
#pragma omp parallel for schedule(static) num_threads(num_threads) shared(progress)
for (size_t i = 0; i < ref_sketches.size(); i++) {
std::vector<float> row_dists(ref_sketches.size());
if (interrupt || PyErr_CheckSignals() != 0) {
interrupt = true;
} else {
if (!interrupt) {
for (size_t j = 0; j < ref_sketches.size(); j++) {
if (i != j) {
if (jaccard) {
Expand All @@ -370,6 +371,9 @@ sparse_coo query_db_sparse(std::vector<Reference> &ref_sketches,
{
progress += MAX(1, n_progress_ticks / dist_rows);
dist_progress.tick_count(progress);
if (omp_get_thread_num() == 0 && PyErr_CheckSignals() != 0) {
interrupt = true;
}
}
}
long offset = i * kNN;
Expand Down
2 changes: 1 addition & 1 deletion src/random/rng.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Xoshiro
{
public:
// Definitions to be used as URNG in C++11
typedef size_t result_type;
typedef uint64_t result_type;
static constexpr size_t min() { return std::numeric_limits<uint64_t>::min(); }
static constexpr size_t max() { return std::numeric_limits<uint64_t>::max(); }
uint64_t operator()(); // generate random number U(min, max)
Expand Down
23 changes: 13 additions & 10 deletions test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,20 @@

# create sketches
sys.stderr.write("Sketch smoke test\n")
subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db -s 10000 -k 15,29,4 --cpus 1", shell=True, check=True)
os.remove("test_db.h5")
subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db -s 10000 -k 15,29,4 --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db_phased --codon-phased --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l references.txt -o test_db_phased --codon-phased --cpus 1", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch 12673_8#24.contigs_velvet.fa 12673_8#34.contigs_velvet.fa -o test_db_small -s 1000 --kmer 14", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py add random test_db --cpus 2", shell=True, check=True)
# calculate distances
sys.stderr.write("Distance integration test\n")
subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 2", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query dist test_db -o ppsketch --cpus 2", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query jaccard test_db_small --cpus 2", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 1", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query dist test_db --cpus 2", shell=True, check=True) # checks if can be run w/ openmp
subprocess.run("python ../sketchlib-runner.py query dist test_db -o ppsketch --cpus 1", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query jaccard test_db_small --cpus 1", shell=True, check=True) # checks if can be run
subprocess.run("python test-dists.py --ref-db test_db --results ppsketch_ref", shell=True, check=True) # checks results match
subprocess.run("python ../sketchlib-runner.py query dist test_db_phased --cpus 2", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query dist test_db_phased --cpus 1", shell=True, check=True) # checks if can be run
subprocess.run("python test-dists.py --ref-db test_db_phased --results ppsketch_ref_phased", shell=True, check=True) # checks results match

sys.stderr.write("Sparse distance smoke test\n")
Expand All @@ -46,19 +49,19 @@
subprocess.run("python ../sketchlib-runner.py query sparse jaccard test_db --kNN 2 --kmer 19", shell=True, check=True) # checks if can be run

sys.stderr.write("Ref v query distance smoke test\n")
subprocess.run("python ../sketchlib-runner.py sketch -l rlist.txt -o r_db --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l qlist.txt -o q_db --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l rlist.txt -o r_db --cpus 1", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l qlist.txt -o q_db --cpus 1", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py query dist r_db q_db.h5", shell=True, check=True) # checks if can be run
subprocess.run("python ../sketchlib-runner.py query jaccard r_db q_db", shell=True, check=True) # checks if can be run

# Joining
sys.stderr.write("Join smoke test\n")
subprocess.run("python ../sketchlib-runner.py sketch -l db1_refs.txt -o db1 --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l db2_refs.txt -o db2 --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l db1_refs.txt -o db1 --cpus 1", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py sketch -l db2_refs.txt -o db2 --cpus 1", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py join db1.h5 db2.h5 -o joined", shell=True, check=True)
# Random
sys.stderr.write("Random test\n")
subprocess.run("python ../sketchlib-runner.py remove random test_db --cpus 2", shell=True, check=True)
subprocess.run("python ../sketchlib-runner.py remove random test_db --cpus 1", shell=True, check=True)
# Matrix
sys.stderr.write("Matrix integration test\n")
subprocess.run("python test-matrix.py", shell=True, check=True)
Expand Down

0 comments on commit 7ee661b

Please sign in to comment.