Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add integration test for post processing of flat flow cell demultiplexed with bclconvert #2236

Merged
merged 13 commits into from
Aug 1, 2023
20 changes: 4 additions & 16 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2754,8 +2754,8 @@ def mock_config(rnafusion_dir: Path, rnafusion_case_id: str) -> None:
)


@pytest.fixture
def expected_total_reads() -> int:
@pytest.fixture(name="expected_total_reads", scope="session")
def fixture_expected_total_reads() -> int:
return 1_000_000


Expand All @@ -2765,8 +2765,8 @@ def fixture_flow_cell_name() -> str:
return "HVKJCDRXX"


@pytest.fixture
def store_with_sequencing_metrics(
@pytest.fixture(name="store_with_sequencing_metrics")
def fixture_store_with_sequencing_metrics(
store: Store, sample_id: str, expected_total_reads: int, flow_cell_name: str
) -> Generator[Store, None, None]:
"""Return a store with multiple samples with sample lane sequencing metrics."""
Expand Down Expand Up @@ -2803,18 +2803,6 @@ def store_with_sequencing_metrics(
yield store


@pytest.fixture
def flow_cell_name_demultiplexed_with_bcl_convert() -> str:
return "HY7FFDRX2"


@pytest.fixture
def flow_cell_directory_name_demultiplexed_with_bcl_convert(
flow_cell_name_demultiplexed_with_bcl_convert: str,
):
return f"230504_A00689_0804_B{flow_cell_name_demultiplexed_with_bcl_convert}"


@pytest.fixture(name="demultiplexed_flow_cells_tmp_directory")
def fixture_demultiplexed_flow_cells_tmp_directory(tmp_path) -> Path:
original_dir = Path(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Lane,Sample_ID,Sample_Project,index,index2,ReadNumber,AdapterBases,SampleBases,% Adapter Bases
1,ACC11927A2,405887,GTCCTTCGGC,CTGTGCATGA,1,0,415032696,0.000
1,ACC11927A2,405887,GTCCTTCGGC,CTGTGCATGA,2,0,1436651640,0.000
1,ACC11927A5,405887,GTTTCACGAT,TTTGGCCGAA,1,0,576200560,0.000
1,ACC11927A5,405887,GTTTCACGAT,TTTGGCCGAA,2,0,1994540400,0.000
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Lane,SampleID,Sample_Project,Index,# Reads,# Perfect Index Reads,# One Mismatch Index Reads,# Two Mismatch Index Reads,% Reads,% Perfect Index Reads,% One Mismatch Index Reads,% Two Mismatch Index Reads
1,ACC11927A2,405887,GTCCTTCGGC-CTGTGCATGA,15962796,15962796,0,0,0.0437,1.0000,0.0000,0.0000
1,ACC11927A5,405887,GTTTCACGAT-TTTGGCCGAA,22161560,22161560,0,0,0.0607,1.0000,0.0000,0.0000
1,Undetermined,Undetermined,,38680101,38680101,0,0,0.1059,1.0000,0.0000,0.0000
2,ACC11927A2,405887,GTCCTTCGGC-CTGTGCATGA,15855013,15855013,0,0,0.0436,1.0000,0.0000,0.0000
2,ACC11927A5,405887,GTTTCACGAT-TTTGGCCGAA,22146552,22146552,0,0,0.0609,1.0000,0.0000,0.0000
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Lane,SampleID,Sample_Project,index,index2,ReadNumber,Yield,YieldQ30,QualityScoreSum,Mean Quality Score (PF),% Q30
1,ACC11927A2,405887,GTCCTTCGGC,CTGTGCATGA,1,415032696,393745856,15004333259,36.15,0.95
1,ACC11927A2,405887,GTCCTTCGGC,CTGTGCATGA,2,1436651640,1344252463,51555254683,35.89,0.94
1,ACC11927A5,405887,GTTTCACGAT,TTTGGCCGAA,1,576200560,546413091,20826965934,36.15,0.95
1,ACC11927A5,405887,GTTTCACGAT,TTTGGCCGAA,2,1994540400,1899135721,72196352070,36.20,0.95
1,Undetermined,Undetermined,,,2,3481209090,2735317061,113977736645,32.74,0.79
2,ACC11927A2,405887,GTCCTTCGGC,CTGTGCATGA,1,412230338,391068079,14902615287,36.15,0.95
2,ACC11927A2,405887,GTCCTTCGGC,CTGTGCATGA,2,1426951170,1332820556,51164851641,35.86,0.93
2,ACC11927A5,405887,GTTTCACGAT,TTTGGCCGAA,1,575810352,545939221,20810867373,36.14,0.95
2,ACC11927A5,405887,GTTTCACGAT,TTTGGCCGAA,2,1993189680,1896844852,72130263160,36.19,0.95
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[Settings]
BarcodeMismatchesIndex1,0
BarcodeMismatchesIndex2,0
[Data]
FCID,Lane,Sample_ID,SampleRef,index,index2,SampleName,Control,Recipe,Operator,Sample_Project
HY7FFDRX2,1,ACC11927A2,hg19,GTCCTTCGGC,CTGTGCATGA,anonymous_1,N,R1,script,405887
HY7FFDRX2,1,ACC11927A5,hg19,GTTTCACGAT,TTTGGCCGAA,anonymous_2,N,R1,script,405887
HY7FFDRX2,2,ACC11927A2,hg19,GTCCTTCGGC,CTGTGCATGA,anonymous_1,N,R1,script,405887
HY7FFDRX2,2,ACC11927A5,hg19,GTTTCACGAT,TTTGGCCGAA,anonymous_2,N,R1,script,405887
58 changes: 57 additions & 1 deletion tests/meta/demultiplex/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pytest

from collections import namedtuple
from datetime import datetime
from pathlib import Path
from typing import List
from typing import List, Dict

from cg.apps.cgstats.stats import StatsAPI
from cg.apps.housekeeper.hk import HousekeeperAPI
Expand All @@ -14,6 +15,8 @@
from cg.store.models import Sample, Family
from tests.store_helpers import StoreHelpers

FlowCellInfo = namedtuple("FlowCellInfo", "directory name sample_internal_ids")


@pytest.fixture(name="tmp_demulitplexing_dir")
def fixture_tmp_demulitplexing_dir(
Expand Down Expand Up @@ -339,6 +342,59 @@ def fixture_delete_demultiplex_api(
)


@pytest.fixture(name="flow_cell_info_map", scope="session")
def fixture_flow_cell_info_map(
flow_cell_directory_name_demultiplexed_with_bcl_convert_flat,
flow_cell_name_demultiplexed_with_bcl_convert,
flow_cell_name_demultiplexed_with_bcl2fastq,
bcl_convert_demultiplexed_flow_cell_sample_internal_ids,
bcl2fastq_demultiplexed_flow_cell_sample_internal_ids,
flow_cell_directory_name_demultiplexed_with_bcl_convert,
flow_cell_directory_name_demultiplexed_with_bcl2fastq,
) -> Dict[str, FlowCellInfo]:
"""Returns a dict with the suitable fixtures for different demultiplexing softwares and
settings. Keys are string, values are named tuples FlowCellInfo."""
return {
"BCL2FASTQ_TREE": FlowCellInfo(
directory=flow_cell_directory_name_demultiplexed_with_bcl2fastq,
name=flow_cell_name_demultiplexed_with_bcl2fastq,
sample_internal_ids=bcl2fastq_demultiplexed_flow_cell_sample_internal_ids,
),
"BCLCONVERT_FLAT": FlowCellInfo(
directory=flow_cell_directory_name_demultiplexed_with_bcl_convert_flat,
name=flow_cell_name_demultiplexed_with_bcl_convert,
sample_internal_ids=bcl_convert_demultiplexed_flow_cell_sample_internal_ids,
),
"BCLCONVERT_TREE": FlowCellInfo(
directory=flow_cell_directory_name_demultiplexed_with_bcl_convert,
name=flow_cell_name_demultiplexed_with_bcl_convert,
sample_internal_ids=bcl_convert_demultiplexed_flow_cell_sample_internal_ids,
),
}


@pytest.fixture(name="flow_cell_name_demultiplexed_with_bcl_convert", scope="session")
def flow_cell_name_demultiplexed_with_bcl_convert() -> str:
return "HY7FFDRX2"


@pytest.fixture(name="flow_cell_directory_name_demultiplexed_with_bcl_convert", scope="session")
def fixture_flow_cell_directory_name_demultiplexed_with_bcl_convert(
flow_cell_name_demultiplexed_with_bcl_convert: str,
):
return f"230504_A00689_0804_B{flow_cell_name_demultiplexed_with_bcl_convert}"


@pytest.fixture(
name="flow_cell_directory_name_demultiplexed_with_bcl_convert_flat", scope="session"
)
def fixture_flow_cell_directory_name_demultiplexed_with_bcl_convert_flat(
flow_cell_name_demultiplexed_with_bcl_convert: str,
):
"""Return the name of a flow cell directory that has been demultiplexed with Bcl Convert using a flat output directory structure."""
return f"230505_A00689_0804_B{flow_cell_name_demultiplexed_with_bcl_convert}"


@pytest.fixture(name="demultiplexing_init_files")
def tmp_demultiplexing_init_files(
bcl2fastq_flow_cell_id: str, populated_delete_demultiplex_api: DeleteDemuxAPI
Expand Down
123 changes: 38 additions & 85 deletions tests/meta/demultiplex/test_demux_post_processing.py
seallard marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import logging
import os
from pathlib import Path
from typing import Generator, List
from typing import Dict, Generator, List

import pytest


from cg.constants.demultiplexing import BclConverter, DemultiplexingDirsAndFiles
Expand All @@ -15,6 +18,8 @@
from cg.models.demultiplex.flow_cell import FlowCellDirectoryData
from cg.store import Store

from tests.meta.demultiplex.conftest import FlowCellInfo


def test_set_dry_run(
demultiplex_context: CGConfig,
Expand Down Expand Up @@ -485,131 +490,79 @@ def test_finish_all_flowcells(
assert f"Check demultiplexed flow cell {bcl2fastq_flow_cell.full_name}" in caplog.text


def test_post_processing_of_flow_cell_demultiplexed_with_bclconvert(
@pytest.mark.parametrize(
"demux_type",
["BCL2FASTQ_TREE", "BCLCONVERT_TREE", "BCLCONVERT_FLAT"],
)
def test_post_processing_of_flow_cell(
demux_type: str,
demultiplex_context: CGConfig,
flow_cell_directory_name_demultiplexed_with_bcl_convert: str,
flow_cell_name_demultiplexed_with_bcl_convert: str,
flow_cell_info_map: Dict[str, FlowCellInfo],
demultiplexed_flow_cells_tmp_directory: Path,
bcl_convert_demultiplexed_flow_cell_sample_internal_ids: List[str],
novaseq_6000_dir: Path,
):
"""Test adding a demultiplexed flow cell to the databases with. Runs on each type of
demultiplexing software and setting used."""

# GIVEN a demultiplexed flow cell
flow_cell_demultplexing_directory: str = flow_cell_info_map.get(demux_type).directory
flow_cell_name: str = flow_cell_info_map.get(demux_type).name
sample_internal_ids: List[str] = flow_cell_info_map.get(demux_type).sample_internal_ids

# GIVEN a DemuxPostProcessing API
demux_post_processing_api = DemuxPostProcessingAPI(demultiplex_context)

# GIVEN a directory with a flow cell demultiplexed with BCL Convert
demux_post_processing_api.demux_api.out_dir = demultiplexed_flow_cells_tmp_directory
demux_post_processing_api.demux_api.run_dir = novaseq_6000_dir

# WHEN post processing the demultiplexed flow cell
demux_post_processing_api.finish_flow_cell_temp(
flow_cell_directory_name_demultiplexed_with_bcl_convert
)

# THEN a flow cell was created in statusdb
assert demux_post_processing_api.status_db.get_flow_cell_by_name(
flow_cell_name_demultiplexed_with_bcl_convert
)

# THEN sequencing metrics were created for the flow cell
assert demux_post_processing_api.status_db.get_sample_lane_sequencing_metrics_by_flow_cell_name(
flow_cell_name=flow_cell_name_demultiplexed_with_bcl_convert
# GIVEN that a sample sheet exists in the flow cell run directory
path = Path(
demux_post_processing_api.demux_api.run_dir,
flow_cell_demultplexing_directory,
DemultiplexingDirsAndFiles.SAMPLE_SHEET_FILE_NAME,
)
# THEN the read count was calculated for all samples in the flow cell directory
for sample_id in bcl_convert_demultiplexed_flow_cell_sample_internal_ids:
sample = demux_post_processing_api.status_db.get_sample_by_internal_id(sample_id)
assert sample is not None
assert sample.calculated_read_count

# THEN a bundle was added to Housekeeper for the flow cell
assert demux_post_processing_api.hk_api.bundle(flow_cell_name_demultiplexed_with_bcl_convert)

# THEN a bundle was added to Housekeeper for each sample
for sample_id in bcl_convert_demultiplexed_flow_cell_sample_internal_ids:
assert demux_post_processing_api.hk_api.bundle(sample_id)

# THEN a sample sheet was added to Housekeeper
assert demux_post_processing_api.hk_api.get_files(
tags=[SequencingFileTag.SAMPLE_SHEET],
bundle=flow_cell_name_demultiplexed_with_bcl_convert,
).all()

# THEN sample fastq files were added to Housekeeper tagged with FASTQ and the flow cell name
for sample_id in bcl_convert_demultiplexed_flow_cell_sample_internal_ids:
assert demux_post_processing_api.hk_api.get_files(
tags=[SequencingFileTag.FASTQ, flow_cell_name_demultiplexed_with_bcl_convert],
bundle=sample_id,
).all()

# THEN a delivery file was created in the flow cell directory
delivery_path = Path(
demux_post_processing_api.demux_api.out_dir,
flow_cell_directory_name_demultiplexed_with_bcl_convert,
DemultiplexingDirsAndFiles.DELIVERY,
)

assert delivery_path.exists()


def test_post_processing_of_flow_cell_demultiplexed_with_bcl2fastq(
demultiplex_context: CGConfig,
flow_cell_directory_name_demultiplexed_with_bcl2fastq: str,
flow_cell_name_demultiplexed_with_bcl2fastq: str,
demultiplexed_flow_cells_tmp_directory: Path,
hiseq_dir: Path,
bcl2fastq_demultiplexed_flow_cell_sample_internal_ids: List[str],
):
# GIVEN a DemuxPostProcessing API
demux_post_processing_api = DemuxPostProcessingAPI(demultiplex_context)

# GIVEN a directory with a flow cell demultiplexed with bcl2fastq
demux_post_processing_api.demux_api.out_dir = demultiplexed_flow_cells_tmp_directory
demux_post_processing_api.demux_api.run_dir = hiseq_dir
os.makedirs(path.parent, exist_ok=True)
path.touch()

# WHEN post processing the demultiplexed flow cell
demux_post_processing_api.finish_flow_cell_temp(
flow_cell_directory_name_demultiplexed_with_bcl2fastq
)
demux_post_processing_api.finish_flow_cell_temp(flow_cell_demultplexing_directory)

# THEN a flow cell was created in statusdb
assert demux_post_processing_api.status_db.get_flow_cell_by_name(
flow_cell_name_demultiplexed_with_bcl2fastq
)
assert demux_post_processing_api.status_db.get_flow_cell_by_name(flow_cell_name)

# THEN sequencing metrics were created for the flow cell
assert demux_post_processing_api.status_db.get_sample_lane_sequencing_metrics_by_flow_cell_name(
flow_cell_name=flow_cell_name_demultiplexed_with_bcl2fastq
flow_cell_name=flow_cell_name
)

# THEN the read count was calculated for all samples in the flow cell directory
for sample_internal_id in bcl2fastq_demultiplexed_flow_cell_sample_internal_ids:
for sample_internal_id in sample_internal_ids:
sample = demux_post_processing_api.status_db.get_sample_by_internal_id(sample_internal_id)
assert sample is not None
assert sample.calculated_read_count

# THEN a bundle was added to Housekeeper for the flow cell
assert demux_post_processing_api.hk_api.bundle(flow_cell_name_demultiplexed_with_bcl2fastq)
assert demux_post_processing_api.hk_api.bundle(flow_cell_name)

# THEN a bundle was added to Housekeeper for each sample
for sample_internal_id in bcl2fastq_demultiplexed_flow_cell_sample_internal_ids:
for sample_internal_id in sample_internal_ids:
assert demux_post_processing_api.hk_api.bundle(sample_internal_id)

# THEN a sample sheet was added to Housekeeper
assert demux_post_processing_api.hk_api.get_files(
tags=[SequencingFileTag.SAMPLE_SHEET],
bundle=flow_cell_name_demultiplexed_with_bcl2fastq,
bundle=flow_cell_name,
).all()

# THEN sample fastq files were added to Housekeeper tagged with FASTQ and the flow cell name
for sample_internal_id in bcl2fastq_demultiplexed_flow_cell_sample_internal_ids:
for sample_internal_id in sample_internal_ids:
assert demux_post_processing_api.hk_api.get_files(
tags=[SequencingFileTag.FASTQ, flow_cell_name_demultiplexed_with_bcl2fastq],
tags=[SequencingFileTag.FASTQ, flow_cell_name],
bundle=sample_internal_id,
).all()

# THEN a delivery file was created in the flow cell directory
delivery_path = Path(
demux_post_processing_api.demux_api.out_dir,
flow_cell_directory_name_demultiplexed_with_bcl2fastq,
flow_cell_demultplexing_directory,
DemultiplexingDirsAndFiles.DELIVERY,
)

Expand Down
Loading