Skip to content

Commit

Permalink
Merge pull request #42 from HadrienG/review_ju
Browse files Browse the repository at this point in the history
0.8.1
  • Loading branch information
HadrienG authored Nov 13, 2017
2 parents 9a5091c + 0bbfc03 commit c827aed
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 25 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ To install InSilicoSeq, type the following in your terminal:
Alternatively, with docker:

```shell
docker pull hadrieng/insilicoseq:0.8.0
docker pull hadrieng/insilicoseq:0.8.1
```

## Usage
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
# The short X.Y version.
version = '0.8'
# The full version, including alpha/beta/rc tags.
release = '0.8.0'
release = '0.8.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion doc/iss/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ If you wish to use InSilicoSeq using docker

.. code-block:: bash
docker pull hadrieng/insilicoseq:0.8.0
docker pull hadrieng/insilicoseq:0.8.1
To use InSilicoSeq with docker, you need to provide a `volume` to the
``docker run`` command. Given with the ``-v`` option, the volume is your way
Expand Down
20 changes: 20 additions & 0 deletions iss/abundance.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,23 @@ def to_coverage(total_n_reads, species_abundance, read_length, genome_size):
n_reads = total_n_reads * species_abundance
coverage = (n_reads * read_length) / genome_size
return coverage


def to_file(abundance_dic, output):
"""write the abundance dictionary to a file
Args:
abundance_dic (dict): the abundance dictionary
output (str): the output file name
"""
logger = logging.getLogger(__name__)
output_abundance = output + '_abundance.txt'
try:
f = open(output_abundance, 'w')
except PermissionError as e:
logger.error('Failed to open output file: %s' % e)
sys.exit(1)
else:
with f:
for record, abundance in abundance_dic.items():
f.write('%s\t%s\n' % (record, abundance))
40 changes: 23 additions & 17 deletions iss/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ def generate_reads(args):
npz = args.model
err_mod = kde.KDErrorModel(npz)
elif args.mode == 'basic':
if args.model is not None:
logger.warning(
'--model %s will be ignored in --mode %s' %
(args.model, args.mode)
)
from iss.error_models import basic
err_mod = basic.BasicErrorModel()
except ImportError as e:
Expand All @@ -62,6 +67,7 @@ def generate_reads(args):
if args.genomes:
genome_file = args.genomes
elif args.ncbi and args.n_genomes:
util.genome_file_exists(args.output + '_genomes.fasta')
genomes = download.ncbi(args.ncbi, args.n_genomes)
genome_file = download.to_fasta(genomes, args.output)
else:
Expand All @@ -79,25 +85,21 @@ def generate_reads(args):
logger.error('Genome(s) file seems empty: %s' % genome_file)
sys.exit(1)
else:
abundance_dispatch = {
'uniform': abundance.uniform,
'halfnormal': abundance.halfnormal,
'exponential': abundance.exponential,
'lognormal': abundance.lognormal,
'zero_inflated_lognormal': abundance.zero_inflated_lognormal
}
# read the abundance file
if args.abundance_file:
logger.info('Using abundance file:%s' % args.abundance_file)
abundance_dic = abundance.parse_abundance_file(args.abundance_file)
elif args.abundance == 'uniform':
elif args.abundance in abundance_dispatch:
logger.info('Using %s abundance distribution' % args.abundance)
abundance_dic = abundance.uniform(record_list)
elif args.abundance == 'halfnormal':
logger.info('Using %s abundance distribution' % args.abundance)
abundance_dic = abundance.halfnormal(record_list)
elif args.abundance == 'exponential':
logger.info('Using %s abundance distribution' % args.abundance)
abundance_dic = abundance.exponential(record_list)
elif args.abundance == 'lognormal':
logger.info('Using %s abundance distribution' % args.abundance)
abundance_dic = abundance.lognormal(record_list)
elif args.abundance == 'zero_inflated_lognormal':
logger.info('Using %s abundance distribution' % args.abundance)
abundance_dic = abundance.zero_inflated_lognormal(record_list)
abundance_dic = abundance_dispatch[args.abundance](record_list)
abundance.to_file(abundance_dic, args.output)
else:
logger.error('Could not get abundance')
sys.exit(1)
Expand Down Expand Up @@ -148,8 +150,12 @@ def generate_reads(args):
logger.error('iss generate interrupted: %s' % e)
generator.cleanup(temp_file_list)
else:
generator.concatenate(temp_file_list, args.output)
generator.cleanup(temp_file_list)
# remove the duplicates in file list and cleanup
# we remove the duplicates in case two records had the same header
# and reads were appended to the same temp file.
temp_file_unique = list(set(temp_file_list))
generator.concatenate(temp_file_unique, args.output)
generator.cleanup(temp_file_unique)
logger.info('Read generation complete')


Expand Down Expand Up @@ -276,7 +282,7 @@ def main():
'--n_reads',
'-n',
metavar='<int>',
default=1000000,
default='1000000',
help='Number of reads to generate (default: %(default)s). Allows \
suffixes k, K, m, M, g and G (ex 0.5M for 500000).'
)
Expand Down
9 changes: 7 additions & 2 deletions iss/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,5 +194,10 @@ def cleanup(file_list):
logger.info('Cleaning up')
for temp_file in file_list:
if temp_file is not None:
os.remove(temp_file + '_R1.fastq')
os.remove(temp_file + '_R2.fastq')
try:
os.remove(temp_file + '_R1.fastq')
os.remove(temp_file + '_R2.fastq')
except FileNotFoundError as e:
logger.error('Temporary file not found: %s' % temp_file)
logger.error('You may have to remove temporary files manually')
sys.exit(1)
12 changes: 12 additions & 0 deletions iss/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from Bio import SeqIO

import os
import sys
import logging
import numpy as np
Expand Down Expand Up @@ -132,3 +133,14 @@ def convert_n_reads(unit):
logger.error('%s is not a valid number of reads' % unit)
sys.exit(1)
return unit_int


def genome_file_exists(filename):
logger = logging.getLogger(__name__)
try:
assert os.path.exists(filename) == False
except AssertionError as e:
logger.error('%s already exists. Aborting.' % filename)
logger.error('Maybe use --genomes %s' % filename)
logger.error('or use --ncbi with another output prefix')
sys.exit(1)
2 changes: 1 addition & 1 deletion iss/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.8.0'
__version__ = '0.8.1'
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

setup(
name='InSilicoSeq',
version='0.8.0',
version='0.8.1',

description='a sequencing simulator',

url='https://github.com/HadrienG/InSilicoSeq',
download_url='https://github.com/HadrienG/InSilicoSeq/tarball/0.8.0',
download_url='https://github.com/HadrienG/InSilicoSeq/tarball/0.8.1',
author='Hadrien Gourlé',
author_email='[email protected]',

Expand Down

0 comments on commit c827aed

Please sign in to comment.