Skip to content

Commit

Permalink
Merge pull request #125 from HadrienG/dev
Browse files Browse the repository at this point in the history
1.4.4
  • Loading branch information
HadrienG authored Oct 23, 2019
2 parents 00da5ba + 681ee96 commit 9b65a3b
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
uses: codecov/[email protected]
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: .coverage
file: coverage.xml
- name: Test install
run: |
pip install -e .
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ build/
# Unit test / coverage reports
.cache
.coverage
coverage.xml

# files needed for tests
!SRR1660402_mapped.bam
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ pycodestyle = "*"

[scripts]
iss = "python -m iss"
tests = "nosetests --with-coverage --cover-package=iss"
tests = "nosetests --with-coverage --cover-package=iss --cover-xml"
73 changes: 39 additions & 34 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 38 additions & 6 deletions iss/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from iss.version import __version__

from Bio import SeqIO
from joblib import Parallel, delayed
from joblib import Parallel, delayed, load, dump

import gc
import os
import sys
import pickle
import random
import logging
import argparse
Expand Down Expand Up @@ -209,6 +211,9 @@ def generate_reads(args):
n_pairs = int(round(
(coverage *
len(record.seq)) / err_mod.read_length) / 2)
# skip record if n_reads == 0
if n_pairs == 0:
continue

# exact n_reads for each cpus
if n_pairs % cpus == 0:
Expand All @@ -219,11 +224,34 @@ def generate_reads(args):
for _ in range(cpus)]
n_pairs_per_cpu[-1] += n_pairs % cpus

record_file_name_list = Parallel(n_jobs=cpus)(
delayed(generator.reads)(
record, err_mod,
n_pairs_per_cpu[i], i, args.output, args.seed,
args.gc_bias) for i in range(cpus))
# due to a bug in multiprocessing
# https://bugs.python.org/issue17560
# we can't send records taking more than 2**31 bytes
# through serialisation.
# In those cases we use memmapping
if sys.getsizeof(str(record.seq)) >= 2**31 - 1:
logger.warning(
"record %s unusually big." % record.id)
logger.warning("Using a memory map.")
mode = "memmap"

record_mmap = "%s.memmap" % args.output
if os.path.exists(record_mmap):
os.unlink(record_mmap)
util.dump(record, record_mmap)
del record
record = record_mmap
gc.collect()
else:
mode = "default"

record_file_name_list = Parallel(
n_jobs=cpus)(
delayed(generator.reads)(
record, err_mod,
n_pairs_per_cpu[i], i, args.output,
args.seed,
args.gc_bias, mode) for i in range(cpus))
temp_file_list.extend(record_file_name_list)
except KeyboardInterrupt as e:
logger.error('iss generate interrupted: %s' % e)
Expand All @@ -232,6 +260,8 @@ def generate_reads(args):
temp_R2 = [temp_file + '_R2.fastq' for temp_file in temp_file_list]
full_tmp_list = temp_R1 + temp_R2
full_tmp_list.append(genome_file)
if os.path.exists("%s.memmap" % args.output):
full_tmp_list.append("%s.memmap" % args.output)
util.cleanup(full_tmp_list)
sys.exit(1)
else:
Expand All @@ -245,6 +275,8 @@ def generate_reads(args):
util.concatenate(temp_R2, args.output + '_R2.fastq')
full_tmp_list = temp_R1 + temp_R2
full_tmp_list.append(genome_file)
if os.path.exists("%s.memmap" % args.output):
full_tmp_list.append("%s.memmap" % args.output)
util.cleanup(full_tmp_list)
if args.compress:
util.compress(args.output + '_R1.fastq')
Expand Down
10 changes: 8 additions & 2 deletions iss/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from __future__ import division, unicode_literals
from builtins import range

from iss.util import rev_comp
from iss.util import load, rev_comp

from Bio import SeqIO
from Bio.Seq import Seq
Expand All @@ -21,7 +21,7 @@


def reads(record, ErrorModel, n_pairs, cpu_number, output, seed,
gc_bias=False):
gc_bias=False, mode="default"):
"""Simulate reads from one genome (or sequence) according to an ErrorModel
This function makes use of the `simulate_read` function to simulate reads
Expand All @@ -42,6 +42,12 @@ def reads(record, ErrorModel, n_pairs, cpu_number, output, seed,
str: the name of the output file
"""
logger = logging.getLogger(__name__)

# load the record from disk if mode is memmap
if mode == "memmap":
record_mmap = load(record)
record = record_mmap

if seed is not None:
random.seed(seed + cpu_number)
np.random.seed(seed + cpu_number)
Expand Down
37 changes: 37 additions & 0 deletions iss/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import sys
import gzip
import pickle
import random
import logging
import numpy as np
Expand Down Expand Up @@ -247,3 +248,39 @@ def compress(filename):
with open(filename, 'rb') as i, gzip.open(outfile, 'wb') as o:
copyfileobj(i, o)
return outfile


def dump(object, output):
"""dump an object, like pickle.dump.
This function uses pickle.dumps to dump large objects
Args:
object (object): a python object
"""
MAX_BYTES = 2**31 - 1
pickled_object = pickle.dumps(object, protocol=pickle.HIGHEST_PROTOCOL)
size = sys.getsizeof(pickled_object)

with open(output, 'wb') as out_file:
for i in range(0, size, MAX_BYTES):
out_file.write(pickled_object[i:i + MAX_BYTES])


def load(filename):
"""load a pickle from disk
This function uses pickle.loads to load large objects
Args:
filename (string): the path of the pickle to load
"""
MAX_BYTES = 2**31 - 1

size = os.path.getsize(filename)
bytes = bytearray(0)

with open(filename, 'rb') as f:
for _ in range(0, size, MAX_BYTES):
bytes += f.read(MAX_BYTES)
object = pickle.loads(bytes)

return object
2 changes: 1 addition & 1 deletion iss/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.4.3'
__version__ = '1.4.4'

0 comments on commit 9b65a3b

Please sign in to comment.