Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
maximilianmordig committed Mar 2, 2024
1 parent f71e34d commit 5e8e1ad
Show file tree
Hide file tree
Showing 105 changed files with 4,385 additions and 1,432 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/build_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
packages: write
contents: read
runs-on: ubuntu-latest
timeout-minutes: 45
timeout-minutes: 60
steps:
-
name: Checkout
Expand Down Expand Up @@ -83,6 +83,7 @@ jobs:
-
name: Test docker image in python3.10
uses: addnab/docker-run-action@v3
timeout-minutes: 20
with:
image: ${{ env.TEST_TAG }}
shell: /bin/bash
Expand All @@ -97,6 +98,7 @@ jobs:
-
name: Push docker image
uses: docker/build-push-action@v4
timeout-minutes: 20
id: Push
with:
context: .
Expand All @@ -108,6 +110,7 @@ jobs:
-
name: Run full usecase (in Docker container)
uses: addnab/docker-run-action@v3
timeout-minutes: 20
with:
image: ${{ env.TEST_TAG }}
shell: /bin/bash
Expand All @@ -133,6 +136,7 @@ jobs:
-
name: Archive figures
uses: actions/upload-artifact@v3
timeout-minutes: 10
with:
name: usecase-figures
path: figures.tar.gz
48 changes: 43 additions & 5 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,61 @@

{
"name": "Python: Current File",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${file}",

// "program": "${file}",

"program": "/home/mmordig/ont_project_all/ont_project/usecases/enrich_usecase.py",
"cwd": "/home/mmordig/ont_project_all/ont_project/runs/enrich_usecase/full_genome_run_sampler_per_window",

"console": "integratedTerminal",
// "justMyCode": true
"justMyCode": false // to debug external library code
},
{
"name": "Python: Attach to python process",
"type": "python",
"type": "debugpy",
"request": "attach",
"processId": "${command:pickProcess}",
"processId": "${command:pickProcess}", // ctrl+Z, fg to get pid; requires "echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope" on Linux, see https://code.visualstudio.com/docs/python/debugging, may need to launch program from within vscode
// "logToFile": true, // in case it fails
"justMyCode": false
}
},
// {
// // "host": "compute-biomed-01",
// ""
// }

{
"name": "Python: enrich usecase",
"type": "debugpy",
"request": "launch",
"python": "/home/mmordig/miniforge3/envs/nanosim/bin/python",

// (cd /home/mmordig/ont_project_all/ont_project/runs/enrich_usecase/chr202122_run && python ~/ont_project_all/ont_project/usecases/enrich_usecase.py)

"program": "/home/mmordig/ont_project_all/ont_project/usecases/enrich_usecase.py",
// "cwd": "/home/mmordig/ont_project_all/ont_project/runs/enrich_usecase/chr202122_run",
"cwd": "/home/mmordig/ont_project_all/ont_project/runs/enrich_usecase/readfish_exp/results_readfishexp_realreads",

"console": "integratedTerminal",
// "justMyCode": true
"justMyCode": false // to debug external library code
},

{
"name": "Python: debug nanosim",
"type": "debugpy",
"request": "launch",

"python": "/home/mmordig/miniforge3/envs/nanosim/bin/python",
"program": "external/ont_nanosim/src/simulator.py",
"args": ["genome", "--model_prefix", "runs/nanosim_models/human_NA12878_DNA_FAB49712_guppy/training", "--ref_g", "runs/data/random_genome.fasta", "-dna_type", "linear", "-med", "15000", "-max", "20000", "-min", "400", "-sd", "6.9", "--output", "runs/data/nanosim_reads/human_genome_med15000/reads_seed3", "--number", "100000", "--seed", "3", "--strandness", "0.5", "--basecaller", "guppy", "--aligned_rate", "100%", "--num_threads", "1", "--no_flanking", "--no_error_profile"],
"cwd": "/home/mmordig/ont_project_all/ont_project/",

"console": "integratedTerminal",
// "justMyCode": true
"justMyCode": false // to debug external library code
},
]
}
28 changes: 28 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "shell",
"label": "rsync to mpi",
"command": [
// "rsync -avzh --exclude /ont_project/runs --exclude /ont_project/usecase_data.tar.gz --exclude /ont_project/.tox/ --exclude /ont_project/.git/ --exclude /ont_project/external/ont_nanosim --progress --delete ~/ont_project_all/ont_project mpi:/home/mmordig/ont_project_all &&",
// also syncing nanosim
"rsync -avzh --exclude /ont_project/runs --exclude /ont_project/usecase_data.tar.gz --exclude /ont_project/.tox/ --exclude /ont_project/.git/ --progress --delete ~/ont_project_all/ont_project mpi:/home/mmordig/ont_project_all &&",

// sync to biomed
// "rsync -avzh --exclude /ont_project/runs --exclude /ont_project/usecase_data.tar.gz --exclude /ont_project/.tox/ --exclude /ont_project/.git/ --exclude /ont_project/external/ont_nanosim --progress --delete ~/ont_project_all/ont_project biomed:/cluster/home/mmordig/ont_project_all &&",
// // sync figures back from biomed
// "rsync -avzh --progress --include='*/' --include '**/figures/*.png' --include '**/configs/*' --include '**/pickled_figures/*.dill' --exclude '*' --delete biomed:/cluster/work/grlab/projects/mmordig/selseq_runs/ ~/ont_project_all/figures_biomed_cluster &&",

"echo Current time: $(date)"
],
"problemMatcher": [],
// in keybindings.json
// {
// "key": "cmd+m cmd+p",
// "command": "workbench.action.tasks.runTask",
// "args": "rsync to mpi"
// }
}
]
}
11 changes: 6 additions & 5 deletions DeveloperNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ This is only applicable if you want to develop the package.

After changing the package entrypoints, you have to reinstall the package with
```{bash}
# test it with `python -c "import ru"`.
pip uninstall -y simreaduntil; pip install -e './[test,readfish,dev]'
# need to reinstall readfish to use our modified version
# Hatch does not support installing dependencies like readfish in editable mode, so we install it manually with "-e".
# ReadFish imports its own files with `ru.*`, so it assumes that the ReadFish directory is in the `PYTHONPATH`.
pip uninstall -y readfish; pip install -e ./external/ont_readfish
```
This is also necessary when modifying the readfish dependency because it cannot easily be installed in editable mode with hatch: https://(github.com/pypa/hatch/issues/588).

Expand Down Expand Up @@ -38,11 +44,6 @@ python -m pytest --cov=. tests/simulator/gap_sampling/test_gap_sampling.py::test
pydoctor "./src/simreaduntil"
# can also put one file to just compile it
# manually install ReadFish
# ReadFish imports its own files with `ru.*`, so it assumes that the ReadFish directory is in the `PYTHONPATH`.
pip install -e ./external/ont_readfish
# test it with `python -c "import ru"`.
git submodule add <url> [<dir>]
git config --add oh-my-zsh.hide-dirty 1 # otherwise cd into NanoSim directory is slow
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ readfish = [
"read-until @ git+https://github.com/nanoporetech/[email protected]",
"readfish @ {root:uri}/external/ont_readfish",
]
rawsignal = [
"pyslow5",
]

[project.scripts]
plot_seqsum = "simreaduntil.seqsum_tools.seqsum_plotting:main"
Expand All @@ -91,7 +94,7 @@ usecase_make_html_report = "simreaduntil.usecase_helpers.cli_usecase.make_html_r

[tool.hatch.version]
source = "vcs"
fallback-version = "unknown_version"
fallback-version = "0.0.0.7999" # dummy version to recognize it

[tool.hatch.metadata]
# to install from git
Expand Down
Binary file modified simulator_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 11 additions & 1 deletion src/simreaduntil/seqsum_tools/coverage_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def get_chrom_start_len(self, read_id) -> Optional[Tuple[Any, int, int]]:
read_id: a NanoSim read id
Returns:
Tuple (chrom, ref_start, ref_len) with respect to forward strand; None
Tuple (chrom, ref_start, ref_len) with respect to forward strand; None if could not be mapped
"""
raise NotImplementedError()

Expand Down Expand Up @@ -255,6 +255,9 @@ def get_fraction_cov_atleast(self, threshold, chroms: Optional[List]=None) -> fl
"""
if chroms is None:
chroms = list(self.coverage_per_chrom.keys())
if len(chroms) == 0:
logger.warning("get_fraction_cov_atleast called with empty chroms, returning 1.0")
return 1.0
return sum((self.coverage_per_chrom[chrom] >= threshold).sum(dtype=np.uint64) for chrom in chroms) / sum(len(self.coverage_per_chrom[chrom]) for chrom in chroms)

def get_chrom_lens(self) -> Dict[str, int]:
Expand Down Expand Up @@ -503,6 +506,9 @@ def get_fraction_cov_atleast(self, threshold, chroms: Optional[List]=None) -> fl
"""
if chroms is None:
chroms = list(self.coverage_per_chrom.keys())
if len(chroms) == 0:
logger.warning("get_fraction_cov_atleast called with empty chroms, returning 1.0")
return 1.0
# last block can be shorter, so we also have to weight it differently
return sum(((self._avg_cov_per_block(chrom) >= threshold) * self._block_sizes(chrom)).sum(dtype=np.uint64) for chrom in chroms) / sum(self.chrom_lens[chrom] for chrom in chroms)

Expand Down Expand Up @@ -548,9 +554,13 @@ def plot_state(self, plot_type, target_coverage=None, **kwargs):
class NanoSimCoverageTracker(CovTrackerClass):
"""
Track coverage by parsing the location from the NanoSim read ids
NanoSim unaligned reads do not map.
"""
def get_chrom_start_len(self, read_id):
nanosim_id = NanoSimId.from_str(read_id)
if nanosim_id.read_type == "unaligned":
return None
return (nanosim_id.chrom, nanosim_id.ref_pos, nanosim_id.ref_len)

class PafCoverageTracker(CovTrackerClass):
Expand Down
Loading

0 comments on commit 5e8e1ad

Please sign in to comment.