Merge pull request #42 from HadrienG/review_ju

0.8.1
HadrienG · Nov 13, 2017 · c827aed · c827aed
2 parents 9a5091c + 0bbfc03
commit c827aed
Show file tree

Hide file tree

Showing 9 changed files with 68 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ To install InSilicoSeq, type the following in your terminal:
 Alternatively, with docker:
 
 ```shell
-docker pull hadrieng/insilicoseq:0.8.0
+docker pull hadrieng/insilicoseq:0.8.1
 ```
 
 ## Usage

diff --git a/doc/conf.py b/doc/conf.py
@@ -60,7 +60,7 @@
 # The short X.Y version.
 version = '0.8'
 # The full version, including alpha/beta/rc tags.
-release = '0.8.0'
+release = '0.8.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/doc/iss/install.rst b/doc/iss/install.rst
@@ -51,7 +51,7 @@ If you wish to use InSilicoSeq using docker
 
 .. code-block:: bash
 
-    docker pull hadrieng/insilicoseq:0.8.0
+    docker pull hadrieng/insilicoseq:0.8.1
 
 To use InSilicoSeq with docker, you need to provide a `volume` to the
 ``docker run`` command. Given with the ``-v`` option, the volume is your way

diff --git a/iss/abundance.py b/iss/abundance.py
@@ -162,3 +162,23 @@ def to_coverage(total_n_reads, species_abundance, read_length, genome_size):
     n_reads = total_n_reads * species_abundance
     coverage = (n_reads * read_length) / genome_size
     return coverage
+
+
+def to_file(abundance_dic, output):
+    """write the abundance dictionary to a file
+
+    Args:
+        abundance_dic (dict): the abundance dictionary
+        output (str): the output file name
+    """
+    logger = logging.getLogger(__name__)
+    output_abundance = output + '_abundance.txt'
+    try:
+        f = open(output_abundance, 'w')
+    except PermissionError as e:
+        logger.error('Failed to open output file: %s' % e)
+        sys.exit(1)
+    else:
+        with f:
+            for record, abundance in abundance_dic.items():
+                f.write('%s\t%s\n' % (record, abundance))
diff --git a/iss/app.py b/iss/app.py
@@ -52,6 +52,11 @@ def generate_reads(args):
                 npz = args.model
             err_mod = kde.KDErrorModel(npz)
         elif args.mode == 'basic':
+            if args.model is not None:
+                logger.warning(
+                    '--model %s will be ignored in --mode %s' %
+                    (args.model, args.mode)
+                )
             from iss.error_models import basic
             err_mod = basic.BasicErrorModel()
     except ImportError as e:
@@ -62,6 +67,7 @@ def generate_reads(args):
         if args.genomes:
             genome_file = args.genomes
         elif args.ncbi and args.n_genomes:
+            util.genome_file_exists(args.output + '_genomes.fasta')
             genomes = download.ncbi(args.ncbi, args.n_genomes)
             genome_file = download.to_fasta(genomes, args.output)
         else:
@@ -79,25 +85,21 @@ def generate_reads(args):
         logger.error('Genome(s) file seems empty: %s' % genome_file)
         sys.exit(1)
     else:
+        abundance_dispatch = {
+            'uniform': abundance.uniform,
+            'halfnormal': abundance.halfnormal,
+            'exponential': abundance.exponential,
+            'lognormal': abundance.lognormal,
+            'zero_inflated_lognormal': abundance.zero_inflated_lognormal
+        }
         # read the abundance file
         if args.abundance_file:
             logger.info('Using abundance file:%s' % args.abundance_file)
             abundance_dic = abundance.parse_abundance_file(args.abundance_file)
-        elif args.abundance == 'uniform':
+        elif args.abundance in abundance_dispatch:
             logger.info('Using %s abundance distribution' % args.abundance)
-            abundance_dic = abundance.uniform(record_list)
-        elif args.abundance == 'halfnormal':
-            logger.info('Using %s abundance distribution' % args.abundance)
-            abundance_dic = abundance.halfnormal(record_list)
-        elif args.abundance == 'exponential':
-            logger.info('Using %s abundance distribution' % args.abundance)
-            abundance_dic = abundance.exponential(record_list)
-        elif args.abundance == 'lognormal':
-            logger.info('Using %s abundance distribution' % args.abundance)
-            abundance_dic = abundance.lognormal(record_list)
-        elif args.abundance == 'zero_inflated_lognormal':
-            logger.info('Using %s abundance distribution' % args.abundance)
-            abundance_dic = abundance.zero_inflated_lognormal(record_list)
+            abundance_dic = abundance_dispatch[args.abundance](record_list)
+            abundance.to_file(abundance_dic, args.output)
         else:
             logger.error('Could not get abundance')
             sys.exit(1)
@@ -148,8 +150,12 @@ def generate_reads(args):
             logger.error('iss generate interrupted: %s' % e)
             generator.cleanup(temp_file_list)
         else:
-            generator.concatenate(temp_file_list, args.output)
-            generator.cleanup(temp_file_list)
+            # remove the duplicates in file list and cleanup
+            # we remove the duplicates in case two records had the same header
+            # and reads were appended to the same temp file.
+            temp_file_unique = list(set(temp_file_list))
+            generator.concatenate(temp_file_unique, args.output)
+            generator.cleanup(temp_file_unique)
             logger.info('Read generation complete')
 
 
@@ -276,7 +282,7 @@ def main():
         '--n_reads',
         '-n',
         metavar='<int>',
-        default=1000000,
+        default='1000000',
         help='Number of reads to generate (default: %(default)s). Allows \
         suffixes k, K, m, M, g and G (ex 0.5M for 500000).'
     )

diff --git a/iss/generator.py b/iss/generator.py
@@ -194,5 +194,10 @@ def cleanup(file_list):
     logger.info('Cleaning up')
     for temp_file in file_list:
         if temp_file is not None:
-            os.remove(temp_file + '_R1.fastq')
-            os.remove(temp_file + '_R2.fastq')
+            try:
+                os.remove(temp_file + '_R1.fastq')
+                os.remove(temp_file + '_R2.fastq')
+            except FileNotFoundError as e:
+                logger.error('Temporary file not found: %s' % temp_file)
+                logger.error('You may have to remove temporary files manually')
+                sys.exit(1)
diff --git a/iss/util.py b/iss/util.py
@@ -6,6 +6,7 @@
 
 from Bio import SeqIO
 
+import os
 import sys
 import logging
 import numpy as np
@@ -132,3 +133,14 @@ def convert_n_reads(unit):
         logger.error('%s is not a valid number of reads' % unit)
         sys.exit(1)
     return unit_int
+
+
+def genome_file_exists(filename):
+    logger = logging.getLogger(__name__)
+    try:
+        assert os.path.exists(filename) == False
+    except AssertionError as e:
+        logger.error('%s already exists. Aborting.' % filename)
+        logger.error('Maybe use --genomes %s' % filename)
+        logger.error('or use --ncbi with another output prefix')
+        sys.exit(1)
diff --git a/iss/version.py b/iss/version.py
@@ -1 +1 @@
-__version__ = '0.8.0'
+__version__ = '0.8.1'
diff --git a/setup.py b/setup.py
@@ -5,12 +5,12 @@
 
 setup(
     name='InSilicoSeq',
-    version='0.8.0',
+    version='0.8.1',
 
     description='a sequencing simulator',
 
     url='https://github.com/HadrienG/InSilicoSeq',
-    download_url='https://github.com/HadrienG/InSilicoSeq/tarball/0.8.0',
+    download_url='https://github.com/HadrienG/InSilicoSeq/tarball/0.8.1',
     author='Hadrien Gourlé',
     author_email='[email protected]',