Mercurial > repos > greg > gene_family_aligner
changeset 43:fc829bac606b draft
Uploaded
author | greg |
---|---|
date | Wed, 06 Sep 2017 09:18:38 -0400 |
parents | 7cb6a832af82 |
children | d02010937c05 |
files | .shed.yml gene_family_aligner.py gene_family_aligner.xml |
diffstat | 3 files changed, 24 insertions(+), 118 deletions(-) [+] |
line wrap: on
line diff
--- a/.shed.yml Fri Aug 25 13:00:09 2017 -0400 +++ b/.shed.yml Wed Sep 06 09:18:38 2017 -0400 @@ -7,7 +7,7 @@ Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that utilize objective classifications of complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool aligns gene family sequences. -remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_aligner +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_aligner type: unrestricted categories: - Phylogenetics
--- a/gene_family_aligner.py Fri Aug 25 13:00:09 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -#!/usr/bin/env python -import argparse -import os - -import utils - -OUTPUT_DIR = 'geneFamilyAlignments_dir' - -parser = argparse.ArgumentParser() -parser.add_argument('--alignment_method', dest='alignment_method', help='Multiple sequence alignments method') -parser.add_argument('--automated_trimming', dest='automated_trimming', default=None, help='Trims alignments using trimAls ML heuristic trimming approach') -parser.add_argument('--codon_alignments', dest='codon_alignments', default=None, help='Flag for constructing orthogroup multiple codon alignments') -parser.add_argument('--gap_trimming', dest='gap_trimming', default=0, type=float, help='Remove sites in alignments with gaps of') -parser.add_argument('--iterative_realignment', dest='iterative_realignment', type=int, default=0, help='Maximum number of iterations') -parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution') -parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets') -parser.add_argument('--output', dest='output', help='Output dataset') -parser.add_argument('--output_dir', dest='output_dir', help='Output dataset files_path directory') -parser.add_argument('--output_dataset_collection', dest='output_dataset_collection', default=None, help='Output additional dataset collection of files.') -parser.add_argument('--pasta_iter_limit', dest='pasta_iter_limit', type=int, default=None, help='Maximum number of iteration that the PASTA algorithm will execute') -parser.add_argument('--pasta_script_path', dest='pasta_script_path', default=None, help='Path to script for executing pasta') -parser.add_argument('--remove_sequences', dest='remove_sequences', default=0, type=float, help='Remove sequences with gaps of') - -args = parser.parse_args() - -# Build the command line. -cmd = 'GeneFamilyAligner' -cmd += ' --orthogroup_faa %s' % args.orthogroup_faa -cmd += ' --alignment_method %s' % args.alignment_method -if args.alignment_method == 'pasta': - if args.pasta_script_path is not None: - cmd += ' --pasta_script_path %s' % args.pasta_script_path - if args.pasta_iter_limit is not None: - cmd += ' --pasta_iter_limit %d' % args.pasta_iter_limit -cmd += ' --num_threads %d' % args.num_threads -if args.codon_alignments is not None: - cmd += ' --codon_alignments' -if args.automated_trimming is not None: - cmd += ' --automated_trimming' -if args.gap_trimming > 0: - cmd += ' --gap_trimming %4f' % args.gap_trimming -if args.remove_sequences > 0: - cmd += ' --remove_sequences %4f' % args.remove_sequences -if args.iterative_realignment > 0: - cmd += ' --iterative_realignment %d' % args.iterative_realignment - -# Run the command. -utils.run_command(cmd) - -# Handle outputs. -if args.codon_alignments is None: - src_output_dir = OUTPUT_DIR -else: - src_output_dir = os.path.join(OUTPUT_DIR, 'orthogroups_aln') -if args.output_dataset_collection is not None: - utils.move_directory_files(src_output_dir, args.output_dataset_collection, copy=True) -utils.move_directory_files(src_output_dir, args.output_dir) -utils.write_html_output(args.output, 'Aligned gene family sequences', args.output_dir)
--- a/gene_family_aligner.xml Fri Aug 25 13:00:09 2017 -0400 +++ b/gene_family_aligner.xml Wed Sep 06 09:18:38 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.2"> +<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.3.0"> <description>aligns integrated orthologous gene family clusters</description> <macros> <import>macros.xml</import> @@ -8,15 +8,7 @@ #set input_format = $input_format_cond.input_format #set alignment_method_cond = $input_format_cond.alignment_method_cond #set alignment_method = $alignment_method_cond.alignment_method -#if str($input_format_cond.input_format) == 'ptortho': - #set output_codon_alignments = False -#else if str($input_format_cond.input_format) == 'ptorthocs' and str($input_format_cond.codon_alignments ) == 'no': - #set output_codon_alignments = False -#else: - #set output_codon_alignments = True -#end if - -python '$__tool_directory__/gene_family_aligner.py' +GeneFamilyAligner --alignment_method $alignment_method #if str($alignment_method) == 'pasta': --pasta_script_path '$__tool_directory__/run_pasta.py' @@ -29,7 +21,7 @@ ## str($input_format) == 'ptorthocs' --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path' #if str($input_format_cond.codon_alignments) == 'yes': - --codon_alignments true + --codon_alignments #end if #end if #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences @@ -40,7 +32,7 @@ --gap_trimming $trim_type_cond.gap_trimming #else: ## str($trim_type) == 'automated_trimming' - --automated_trimming true + --automated_trimming #end if #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps @@ -51,33 +43,7 @@ #if str($remove_sequences_with_gaps_cond.iterative_realignment): --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment #end if - #if $output_codon_alignments: - --output '$output_aln_filtered_ca' - --output_dir '$output_aln_filtered_ca.files_path' - #else: - --output '$output_aln_filtered' - --output_dir '$output_aln_filtered.files_path' - #end if - #else: - #if $output_codon_alignments: - --output '$output_aln_trimmed_ca' - --output_dir '$output_aln_trimmed_ca.files_path' - #else: - --output '$output_aln_trimmed' - --output_dir '$output_aln_trimmed.files_path' - #end if #end if -#else: - #if $output_codon_alignments: - --output '$output_aln_ca' - --output_dir '$output_aln_ca.files_path' - #else: - --output '$output_aln' - --output_dir '$output_aln.files_path' - #end if -#end if -#if str($output_dataset_collection) == 'yes': - --output_dataset_collection dataset_collection #end if ]]></command> <inputs> @@ -107,27 +73,25 @@ </param> </inputs> <outputs> - <data name="output_aln" format="ptalign" label="${tool.name} (proteins orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> - </data> - <data name="output_aln_ca" format="ptalignca" label="${tool.name} (protein and coding sequences orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> - </data> - <data name="output_aln_filtered" format="ptalignfiltered" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> - </data> - <data name="output_aln_filtered_ca" format="ptalignfilteredca" label="${tool.name} (filtered protein and coding sequences orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> - </data> - <data name="output_aln_trimmed" format="ptaligntrimmed" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> - </data> - <data name="output_aln_trimmed_ca" format="ptaligntrimmedca" label="${tool.name} (trimmed protein and coding sequences orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> - </data> - <collection name="dataset_collection" type="list" label="${tool.name} (dataset collection) on ${on_string}"> - <discover_datasets pattern="__name__" directory="dataset_collection" format="fasta" /> - <filter>output_dataset_collection == 'yes'</filter> + <collection name="primary" type="list" label="${tool.name} (primary proteins orthogroup alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="orthogroups_aln" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> + </collection> + <collection name="trimmed" type="list" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="orthogroups_aln" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter> + </collection> + <collection name="primary_trimmed" type="list" label="${tool.name} (primary proteins orthogroup alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="other_orthogroups_aln" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter> + </collection> + <collection name="filtered" type="list" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="orthogroups_aln" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter> + </collection> + <collection name="primary_filtered" type="list" label="${tool.name} (primary and trimmed proteins orthogroup alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="other_orthogroups_aln" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter> </collection> </outputs> <tests>