aweimann · foobarx · Jul 31, 2017 · Jul 31, 2017 · Jul 31, 2017 · Jul 31, 2017
diff --git a/bin/traitar_from_archive b/bin/traitar_from_archive
@@ -6,15 +6,26 @@ if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser("Traitar wrapper")
     parser.add_argument("input_archive", help='directory with the input data')
-    parser.add_argument("archive_type", help='specify kind of archive', choices = ["tar.gz", "zip"])
+    parser.add_argument("archive_type", help='specify kind of archive', choices = ["tar.gz", "zip", "directory"])
     parser.add_argument("mode", help='either from_genes if gene prediction amino acid fasta is available in input_dir otherwise from_nucleotides in this case Prodigal is used to determine the ORFs from the nucleotide fasta files in input_dir', choices=["from_genes", "from_nucleotides", "from_annotation_summary"])
     parser.add_argument("out_archive", help='compressed traitar output foldder')
     parser.add_argument("-c", "--cpus", help='number of cpus used for the individual steps; maximum is number of samples; needs parallel', default = 1)
     parser.add_argument("--sample2cat",  help='a table giving an environment for each sample')
     parser.add_argument("--input_dir", help='directory for the traitar input; will be created if it doesn\'t exist yet', default='traitar_in')
     parser.add_argument("--output_dir", help='directory for the traitar output; will be created if it doesn\'t exist yet', default='traitar_out')
     parser.add_argument("--heatmap_format", choices = ["png", "pdf", "svg", "jpg"], default='pdf', help = "choose file format for the heatmap") 
+
+    parser.add_argument("--gene_gff_type", default=None)
+    parser.add_argument("--primary_models", default=None)
+    parser.add_argument("--secondary_models", default=None)
+    parser.add_argument("--primary_hmm_db", default=None)
+    parser.add_argument("--secondary_hmm_db", default=None)
+    parser.add_argument("--annotation_summary", default=None)
+    parser.add_argument("--output_image", default=None)
+    parser.add_argument("--generate_galaxy_html", default=None)
+    parser.add_argument("--input_names", default=None)
+
     args = parser.parse_args()
-    read_archive(args.input_archive, args.archive_type, args.mode, args.sample2cat, args.input_dir)
+    read_archive(args.input_archive, args.archive_type, args.mode, args.sample2cat, args.input_dir, args.input_names)
     call_traitar(args)
 
diff --git a/traitar/hmmer2filtered_best.py b/traitar/hmmer2filtered_best.py
@@ -49,7 +49,7 @@ def aggregate_domain_hits(filtered_df, out_f):
     #sort by gene identifier and Pfam
     with open(out_f, 'w') as out_fo:
         ps.DataFrame(filtered_df.columns).T.to_csv(out_f, sep = "\t", index = False, header = False, mode = 'a')
-        filtered_df.sort_values(by = ["target name", "query name"], inplace = True)
+        filtered_df.sort_values(by = ["target name", "query name"], inplace = True) # index
         if filtered_df.shape[0] > 0:
             current_max = filtered_df.iloc[0,] 
         else:

diff --git a/traitar/html/sample.html b/traitar/html/sample.html
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html>
+<body>
+<img src="traitar.png"/>  
+
+<img src="heatmap_combined.png" width="100%"/> 
+
+An archive containing the complete output can be downloaded <a href="archive.tar.gz">here</a>.
+<br>
+To submit another Traitar job, use the links on the left-hand pane of this window.
+ <br>
+ <img src="/static/images/traitar/Screenshot_12_fix.png"/>
+</body>
+</html> 
diff --git a/traitar/html/traitar.png b/traitar/html/traitar.png
diff --git a/traitar/traitar.py b/traitar/traitar.py
@@ -243,7 +243,7 @@ def execute_commands(self, commands, joblog = None):
         if self.cpu > 1:
             #run with parallel
             #ps.DataFrame(commands).to_csv(tf, index = False, header = False) 
-            p = Popen("parallel --will-cite %s -j %s" %  ("--joblog %s" % joblog if joblog is not None else "", self.cpu),  stdout = devnull, shell = True,  executable = "/bin/bash", stdin = PIPE, env = env)
+            p = Popen("parallel --will-cite %s -j %s" %  ("--joblog %s" % joblog if joblog is not None else "", self.cpu),  stdout = devnull, shell = True,  executable = "/bin/bash", stdin = PIPE, env = env)       
             p.communicate(input = "\n".join(commands))
             if p.returncode != 0:
                 if not joblog is None:

diff --git a/traitar/traitar_from_archive.py b/traitar/traitar_from_archive.py
@@ -3,7 +3,10 @@
 import pandas as pd
 import re
 import os
+import os.path
 from .traitar import phenolyze
+from shutil import copyfile
+
 
 def get_sample_names(namelist):
     """parse sample names"""
@@ -26,31 +29,46 @@ def get_sample_names(namelist):
 
 
 
-def read_archive(input_archive, archive_type, mode, sample2cat, input_dir):
+def read_archive(input_archive, archive_type, mode, sample2cat, input_dir, input_names):
     """read archive"""
     if not os.path.exists(input_dir):
         os.mkdir(input_dir)
-    if archive_type == "zip":
-        archive = zipfile.open(input_archive)
-        namelist = archive.namelist()
-    if archive_type == "tar.gz":
-        archive = tarfile.open(input_archive, "r:gz")
-        namelist = archive.getnames()
-    sample_file_names, sample_names = get_sample_names(namelist)
-    for tf, sfn in zip(namelist, sample_file_names):
-            extracted = archive.extractfile(tf) 
-            with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out:
-                for line in extracted:
-                    sample_file_out.write(line) 
-            extracted.close()
-
+
+    if archive_type == "zip" or archive_type == "tar.gz":
+        if archive_type == "zip":
+            archive = zipfile.open(input_archive)
+            namelist = archive.namelist()
+        if archive_type == "tar.gz":
+            archive = tarfile.open(input_archive, "r")
+            namelist = archive.getnames()
+        sample_file_names, sample_names = get_sample_names(namelist)
+        for tf, sfn in zip(namelist, sample_file_names):
+                extracted = archive.extractfile(tf) 
+                with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out:
+                    for line in extracted:
+                        sample_file_out.write(line) 
+                extracted.close()
+    elif archive_type == "directory":
+        sample_names = input_names.split(',')
+        sample_file_names = []
+        for input_part in input_archive.split(','):
+            input_dir_part=os.path.basename(input_part)
+            sample_file_names.append(input_dir_part)
+            os.symlink(input_part, input_dir+"/"+input_dir_part)
+
 
     #create sample table
     if sample2cat is not None:
-        sample_cat = pd.read_cvs(sample2cat, index_col = 0, sep = "\t")
+        sample_cat = pd.read_csv(sample2cat, index_col = 0, sep = "\t")
         #replace index with cleaned file names
-        sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)]))
-        sample_table = pd.DataFrame([sample_file_names, sample_cat.loc[sample_file_names,]])
+        if archive_type != "directory":
+            sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)]))
+            sample_table = pd.DataFrame(sample_names)
+            categories = pd.Series(sample_cat.loc[sample_file_names, ]['category'].tolist())
+        else:
+            sample_table = pd.DataFrame(sample_file_names)
+            categories = pd.Series(sample_cat.loc[sample_names, ]['category'].tolist())
+        sample_table['category'] = categories          
         sample_table.columns = ["sample_file_name", "category"]
     else:
         sample_table = pd.DataFrame(sample_file_names)
@@ -66,5 +84,27 @@ def call_traitar(args):
     args.sample2file = "%s/sample_table.txt" % args.input_dir 
     phenolyze(args)
     #compress output
-    with tarfile.open(args.out_archive, "w:gz") as tar:
-        tar.add(args.output_dir, arcname=os.path.basename(args.output_dir))
+
+    if args.generate_galaxy_html is not None:
+        (html_file, html_dir) = args.generate_galaxy_html.split(':')
+        os.makedirs(html_dir)
+        image_name = args.output_dir+"/phenotype_prediction/heatmap_combined.%s" % args.heatmap_format
+        target_image_name = html_dir+"/heatmap_combined.%s" % args.heatmap_format
+        copyfile(image_name, target_image_name)
+        with tarfile.open(html_dir+"/archive.tar.gz", "w:gz") as tar:
+            tar.add(args.output_dir, arcname=os.path.basename(args.output_dir))
+        copyfile('/home/traitar/traitar/traitar/html/sample.html', html_file)
+        logo_file = html_dir+"/traitar.png"
+        copyfile('/home/traitar/traitar/traitar/html/traitar.png', logo_file)
+    else:
+        with tarfile.open(args.out_archive, "w:gz") as tar:
+            tar.add(args.output_dir, arcname=os.path.basename(args.output_dir))
+
+        if args.output_image is not None:
+            image_source = args.output_dir+"/phenotype_prediction/heatmap_combined.%s" % args.heatmap_format
+            if args.output_image[0:1] == '/':
+                output_image = args.output_image
+            else:
+                output_image = os.path.dirname(args.out_archive)+'/'+args.output_image
+
+            copyfile(image_source, output_image)