diff --git a/bin/traitar_from_archive b/bin/traitar_from_archive index eda8223..d6138b0 100755 --- a/bin/traitar_from_archive +++ b/bin/traitar_from_archive @@ -6,7 +6,7 @@ if __name__ == "__main__": import argparse parser = argparse.ArgumentParser("Traitar wrapper") parser.add_argument("input_archive", help='directory with the input data') - parser.add_argument("archive_type", help='specify kind of archive', choices = ["tar.gz", "zip"]) + parser.add_argument("archive_type", help='specify kind of archive', choices = ["tar.gz", "zip", "directory"]) parser.add_argument("mode", help='either from_genes if gene prediction amino acid fasta is available in input_dir otherwise from_nucleotides in this case Prodigal is used to determine the ORFs from the nucleotide fasta files in input_dir', choices=["from_genes", "from_nucleotides", "from_annotation_summary"]) parser.add_argument("out_archive", help='compressed traitar output foldder') parser.add_argument("-c", "--cpus", help='number of cpus used for the individual steps; maximum is number of samples; needs parallel', default = 1) @@ -14,7 +14,18 @@ if __name__ == "__main__": parser.add_argument("--input_dir", help='directory for the traitar input; will be created if it doesn\'t exist yet', default='traitar_in') parser.add_argument("--output_dir", help='directory for the traitar output; will be created if it doesn\'t exist yet', default='traitar_out') parser.add_argument("--heatmap_format", choices = ["png", "pdf", "svg", "jpg"], default='pdf', help = "choose file format for the heatmap") + + parser.add_argument("--gene_gff_type", default=None) + parser.add_argument("--primary_models", default=None) + parser.add_argument("--secondary_models", default=None) + parser.add_argument("--primary_hmm_db", default=None) + parser.add_argument("--secondary_hmm_db", default=None) + parser.add_argument("--annotation_summary", default=None) + parser.add_argument("--output_image", default=None) + parser.add_argument("--generate_galaxy_html", default=None) + parser.add_argument("--input_names", default=None) + args = parser.parse_args() - read_archive(args.input_archive, args.archive_type, args.mode, args.sample2cat, args.input_dir) + read_archive(args.input_archive, args.archive_type, args.mode, args.sample2cat, args.input_dir, args.input_names) call_traitar(args) diff --git a/traitar/hmmer2filtered_best.py b/traitar/hmmer2filtered_best.py index 5e2c6f0..a1fe4a7 100644 --- a/traitar/hmmer2filtered_best.py +++ b/traitar/hmmer2filtered_best.py @@ -49,7 +49,7 @@ def aggregate_domain_hits(filtered_df, out_f): #sort by gene identifier and Pfam with open(out_f, 'w') as out_fo: ps.DataFrame(filtered_df.columns).T.to_csv(out_f, sep = "\t", index = False, header = False, mode = 'a') - filtered_df.sort_values(by = ["target name", "query name"], inplace = True) + filtered_df.sort_values(by = ["target name", "query name"], inplace = True) # index if filtered_df.shape[0] > 0: current_max = filtered_df.iloc[0,] else: diff --git a/traitar/html/sample.html b/traitar/html/sample.html new file mode 100644 index 0000000..12cffde --- /dev/null +++ b/traitar/html/sample.html @@ -0,0 +1,14 @@ + + + + + + + +An archive containing the complete output can be downloaded here. +
+To submit another Traitar job, use the links on the left-hand pane of this window. +
+ + + diff --git a/traitar/html/traitar.png b/traitar/html/traitar.png new file mode 100644 index 0000000..7fb69de Binary files /dev/null and b/traitar/html/traitar.png differ diff --git a/traitar/traitar.py b/traitar/traitar.py index 863770e..d574ec7 100644 --- a/traitar/traitar.py +++ b/traitar/traitar.py @@ -243,7 +243,7 @@ def execute_commands(self, commands, joblog = None): if self.cpu > 1: #run with parallel #ps.DataFrame(commands).to_csv(tf, index = False, header = False) - p = Popen("parallel --will-cite %s -j %s" % ("--joblog %s" % joblog if joblog is not None else "", self.cpu), stdout = devnull, shell = True, executable = "/bin/bash", stdin = PIPE, env = env) + p = Popen("parallel --will-cite %s -j %s" % ("--joblog %s" % joblog if joblog is not None else "", self.cpu), stdout = devnull, shell = True, executable = "/bin/bash", stdin = PIPE, env = env) p.communicate(input = "\n".join(commands)) if p.returncode != 0: if not joblog is None: diff --git a/traitar/traitar_from_archive.py b/traitar/traitar_from_archive.py index adffe39..1b78bdf 100644 --- a/traitar/traitar_from_archive.py +++ b/traitar/traitar_from_archive.py @@ -3,7 +3,10 @@ import pandas as pd import re import os +import os.path from .traitar import phenolyze +from shutil import copyfile + def get_sample_names(namelist): """parse sample names""" @@ -26,31 +29,46 @@ def get_sample_names(namelist): -def read_archive(input_archive, archive_type, mode, sample2cat, input_dir): +def read_archive(input_archive, archive_type, mode, sample2cat, input_dir, input_names): """read archive""" if not os.path.exists(input_dir): os.mkdir(input_dir) - if archive_type == "zip": - archive = zipfile.open(input_archive) - namelist = archive.namelist() - if archive_type == "tar.gz": - archive = tarfile.open(input_archive, "r:gz") - namelist = archive.getnames() - sample_file_names, sample_names = get_sample_names(namelist) - for tf, sfn in zip(namelist, sample_file_names): - extracted = archive.extractfile(tf) - with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out: - for line in extracted: - sample_file_out.write(line) - extracted.close() - + + if archive_type == "zip" or archive_type == "tar.gz": + if archive_type == "zip": + archive = zipfile.open(input_archive) + namelist = archive.namelist() + if archive_type == "tar.gz": + archive = tarfile.open(input_archive, "r") + namelist = archive.getnames() + sample_file_names, sample_names = get_sample_names(namelist) + for tf, sfn in zip(namelist, sample_file_names): + extracted = archive.extractfile(tf) + with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out: + for line in extracted: + sample_file_out.write(line) + extracted.close() + elif archive_type == "directory": + sample_names = input_names.split(',') + sample_file_names = [] + for input_part in input_archive.split(','): + input_dir_part=os.path.basename(input_part) + sample_file_names.append(input_dir_part) + os.symlink(input_part, input_dir+"/"+input_dir_part) + #create sample table if sample2cat is not None: - sample_cat = pd.read_cvs(sample2cat, index_col = 0, sep = "\t") + sample_cat = pd.read_csv(sample2cat, index_col = 0, sep = "\t") #replace index with cleaned file names - sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)])) - sample_table = pd.DataFrame([sample_file_names, sample_cat.loc[sample_file_names,]]) + if archive_type != "directory": + sample_cat.index.rename(str, dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)])) + sample_table = pd.DataFrame(sample_names) + categories = pd.Series(sample_cat.loc[sample_file_names, ]['category'].tolist()) + else: + sample_table = pd.DataFrame(sample_file_names) + categories = pd.Series(sample_cat.loc[sample_names, ]['category'].tolist()) + sample_table['category'] = categories sample_table.columns = ["sample_file_name", "category"] else: sample_table = pd.DataFrame(sample_file_names) @@ -66,5 +84,27 @@ def call_traitar(args): args.sample2file = "%s/sample_table.txt" % args.input_dir phenolyze(args) #compress output - with tarfile.open(args.out_archive, "w:gz") as tar: - tar.add(args.output_dir, arcname=os.path.basename(args.output_dir)) + + if args.generate_galaxy_html is not None: + (html_file, html_dir) = args.generate_galaxy_html.split(':') + os.makedirs(html_dir) + image_name = args.output_dir+"/phenotype_prediction/heatmap_combined.%s" % args.heatmap_format + target_image_name = html_dir+"/heatmap_combined.%s" % args.heatmap_format + copyfile(image_name, target_image_name) + with tarfile.open(html_dir+"/archive.tar.gz", "w:gz") as tar: + tar.add(args.output_dir, arcname=os.path.basename(args.output_dir)) + copyfile('/home/traitar/traitar/traitar/html/sample.html', html_file) + logo_file = html_dir+"/traitar.png" + copyfile('/home/traitar/traitar/traitar/html/traitar.png', logo_file) + else: + with tarfile.open(args.out_archive, "w:gz") as tar: + tar.add(args.output_dir, arcname=os.path.basename(args.output_dir)) + + if args.output_image is not None: + image_source = args.output_dir+"/phenotype_prediction/heatmap_combined.%s" % args.heatmap_format + if args.output_image[0:1] == '/': + output_image = args.output_image + else: + output_image = os.path.dirname(args.out_archive)+'/'+args.output_image + + copyfile(image_source, output_image)