diff --git a/bin/traitar_from_archive b/bin/traitar_from_archive
index eda8223..d6138b0 100755
--- a/bin/traitar_from_archive
+++ b/bin/traitar_from_archive
@@ -6,7 +6,7 @@ if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser("Traitar wrapper")
parser.add_argument("input_archive", help='directory with the input data')
- parser.add_argument("archive_type", help='specify kind of archive', choices = ["tar.gz", "zip"])
+ parser.add_argument("archive_type", help='specify kind of archive', choices = ["tar.gz", "zip", "directory"])
parser.add_argument("mode", help='either from_genes if gene prediction amino acid fasta is available in input_dir otherwise from_nucleotides in this case Prodigal is used to determine the ORFs from the nucleotide fasta files in input_dir', choices=["from_genes", "from_nucleotides", "from_annotation_summary"])
parser.add_argument("out_archive", help='compressed traitar output foldder')
parser.add_argument("-c", "--cpus", help='number of cpus used for the individual steps; maximum is number of samples; needs parallel', default = 1)
@@ -14,7 +14,18 @@ if __name__ == "__main__":
parser.add_argument("--input_dir", help='directory for the traitar input; will be created if it doesn\'t exist yet', default='traitar_in')
parser.add_argument("--output_dir", help='directory for the traitar output; will be created if it doesn\'t exist yet', default='traitar_out')
parser.add_argument("--heatmap_format", choices = ["png", "pdf", "svg", "jpg"], default='pdf', help = "choose file format for the heatmap")
+
+ parser.add_argument("--gene_gff_type", default=None)
+ parser.add_argument("--primary_models", default=None)
+ parser.add_argument("--secondary_models", default=None)
+ parser.add_argument("--primary_hmm_db", default=None)
+ parser.add_argument("--secondary_hmm_db", default=None)
+ parser.add_argument("--annotation_summary", default=None)
+ parser.add_argument("--output_image", default=None)
+ parser.add_argument("--generate_galaxy_html", default=None)
+ parser.add_argument("--input_names", default=None)
+
args = parser.parse_args()
- read_archive(args.input_archive, args.archive_type, args.mode, args.sample2cat, args.input_dir)
+ read_archive(args.input_archive, args.archive_type, args.mode, args.sample2cat, args.input_dir, args.input_names)
call_traitar(args)
diff --git a/traitar/hmmer2filtered_best.py b/traitar/hmmer2filtered_best.py
index 5e2c6f0..a1fe4a7 100644
--- a/traitar/hmmer2filtered_best.py
+++ b/traitar/hmmer2filtered_best.py
@@ -49,7 +49,7 @@ def aggregate_domain_hits(filtered_df, out_f):
#sort by gene identifier and Pfam
with open(out_f, 'w') as out_fo:
ps.DataFrame(filtered_df.columns).T.to_csv(out_f, sep = "\t", index = False, header = False, mode = 'a')
- filtered_df.sort_values(by = ["target name", "query name"], inplace = True)
+ filtered_df.sort_values(by = ["target name", "query name"], inplace = True) # index
if filtered_df.shape[0] > 0:
current_max = filtered_df.iloc[0,]
else:
diff --git a/traitar/html/sample.html b/traitar/html/sample.html
new file mode 100644
index 0000000..12cffde
--- /dev/null
+++ b/traitar/html/sample.html
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+An archive containing the complete output can be downloaded here.
+
+To submit another Traitar job, use the links on the left-hand pane of this window.
+
+
+
+
diff --git a/traitar/html/traitar.png b/traitar/html/traitar.png
new file mode 100644
index 0000000..7fb69de
Binary files /dev/null and b/traitar/html/traitar.png differ
diff --git a/traitar/traitar.py b/traitar/traitar.py
index 863770e..d574ec7 100644
--- a/traitar/traitar.py
+++ b/traitar/traitar.py
@@ -243,7 +243,7 @@ def execute_commands(self, commands, joblog = None):
if self.cpu > 1:
#run with parallel
#ps.DataFrame(commands).to_csv(tf, index = False, header = False)
- p = Popen("parallel --will-cite %s -j %s" % ("--joblog %s" % joblog if joblog is not None else "", self.cpu), stdout = devnull, shell = True, executable = "/bin/bash", stdin = PIPE, env = env)
+ p = Popen("parallel --will-cite %s -j %s" % ("--joblog %s" % joblog if joblog is not None else "", self.cpu), stdout = devnull, shell = True, executable = "/bin/bash", stdin = PIPE, env = env)
p.communicate(input = "\n".join(commands))
if p.returncode != 0:
if not joblog is None:
diff --git a/traitar/traitar_from_archive.py b/traitar/traitar_from_archive.py
index adffe39..1b78bdf 100644
--- a/traitar/traitar_from_archive.py
+++ b/traitar/traitar_from_archive.py
@@ -3,7 +3,10 @@
import pandas as pd
import re
import os
+import os.path
from .traitar import phenolyze
+from shutil import copyfile
+
def get_sample_names(namelist):
"""parse sample names"""
@@ -26,31 +29,46 @@ def get_sample_names(namelist):
-def read_archive(input_archive, archive_type, mode, sample2cat, input_dir):
+def read_archive(input_archive, archive_type, mode, sample2cat, input_dir, input_names):
"""read archive"""
if not os.path.exists(input_dir):
os.mkdir(input_dir)
- if archive_type == "zip":
- archive = zipfile.open(input_archive)
- namelist = archive.namelist()
- if archive_type == "tar.gz":
- archive = tarfile.open(input_archive, "r:gz")
- namelist = archive.getnames()
- sample_file_names, sample_names = get_sample_names(namelist)
- for tf, sfn in zip(namelist, sample_file_names):
- extracted = archive.extractfile(tf)
- with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out:
- for line in extracted:
- sample_file_out.write(line)
- extracted.close()
-
+
+ if archive_type == "zip" or archive_type == "tar.gz":
+ if archive_type == "zip":
+ archive = zipfile.open(input_archive)
+ namelist = archive.namelist()
+ if archive_type == "tar.gz":
+ archive = tarfile.open(input_archive, "r")
+ namelist = archive.getnames()
+ sample_file_names, sample_names = get_sample_names(namelist)
+ for tf, sfn in zip(namelist, sample_file_names):
+ extracted = archive.extractfile(tf)
+ with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out:
+ for line in extracted:
+ sample_file_out.write(line)
+ extracted.close()
+ elif archive_type == "directory":
+ sample_names = input_names.split(',')
+ sample_file_names = []
+ for input_part in input_archive.split(','):
+ input_dir_part=os.path.basename(input_part)
+ sample_file_names.append(input_dir_part)
+ os.symlink(input_part, input_dir+"/"+input_dir_part)
+
#create sample table
if sample2cat is not None:
- sample_cat = pd.read_cvs(sample2cat, index_col = 0, sep = "\t")
+ sample_cat = pd.read_csv(sample2cat, index_col = 0, sep = "\t")
#replace index with cleaned file names
- sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)]))
- sample_table = pd.DataFrame([sample_file_names, sample_cat.loc[sample_file_names,]])
+ if archive_type != "directory":
+ sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)]))
+ sample_table = pd.DataFrame(sample_names)
+ categories = pd.Series(sample_cat.loc[sample_file_names, ]['category'].tolist())
+ else:
+ sample_table = pd.DataFrame(sample_file_names)
+ categories = pd.Series(sample_cat.loc[sample_names, ]['category'].tolist())
+ sample_table['category'] = categories
sample_table.columns = ["sample_file_name", "category"]
else:
sample_table = pd.DataFrame(sample_file_names)
@@ -66,5 +84,27 @@ def call_traitar(args):
args.sample2file = "%s/sample_table.txt" % args.input_dir
phenolyze(args)
#compress output
- with tarfile.open(args.out_archive, "w:gz") as tar:
- tar.add(args.output_dir, arcname=os.path.basename(args.output_dir))
+
+ if args.generate_galaxy_html is not None:
+ (html_file, html_dir) = args.generate_galaxy_html.split(':')
+ os.makedirs(html_dir)
+ image_name = args.output_dir+"/phenotype_prediction/heatmap_combined.%s" % args.heatmap_format
+ target_image_name = html_dir+"/heatmap_combined.%s" % args.heatmap_format
+ copyfile(image_name, target_image_name)
+ with tarfile.open(html_dir+"/archive.tar.gz", "w:gz") as tar:
+ tar.add(args.output_dir, arcname=os.path.basename(args.output_dir))
+ copyfile('/home/traitar/traitar/traitar/html/sample.html', html_file)
+ logo_file = html_dir+"/traitar.png"
+ copyfile('/home/traitar/traitar/traitar/html/traitar.png', logo_file)
+ else:
+ with tarfile.open(args.out_archive, "w:gz") as tar:
+ tar.add(args.output_dir, arcname=os.path.basename(args.output_dir))
+
+ if args.output_image is not None:
+ image_source = args.output_dir+"/phenotype_prediction/heatmap_combined.%s" % args.heatmap_format
+ if args.output_image[0:1] == '/':
+ output_image = args.output_image
+ else:
+ output_image = os.path.dirname(args.out_archive)+'/'+args.output_image
+
+ copyfile(image_source, output_image)