Mercurial > repos > greg > gene_family_classifier
view gene_family_classifier.xml @ 33:c1a3b77de0f0 draft
Uploaded
author | greg |
---|---|
date | Wed, 01 Feb 2017 14:50:48 -0500 |
parents | 364960a5ad2c |
children | 9a60634252fd |
line wrap: on
line source
<tool id="plant_tribes_gene_family_classifier" name="Classify gene sequences" version="0.3"> <description>into precomputed orthologous gene family clusters</description> <requirements> <requirement type="package" version="0.3">plant_tribes_gene_family_classifier</requirement> </requirements> <stdio> <!-- Anything other than zero is an error --> <exit_code range="1:" /> <exit_code range=":-1" /> <!-- In case the return code has not been set propery check stderr too --> <regex match="Error:" /> <regex match="Exception:" /> </stdio> <command> <![CDATA[ #if str($options_type.options_type_selector) == 'advanced': #set create_orthogroup_cond = $options_type.create_orthogroup_cond #set create_orthogroup = $create_orthogroup_cond.create_orthogroup #if str($create_orthogroup) == 'yes': #set create_corresponding_coding_sequences_cond = $create_orthogroup_cond.create_corresponding_coding_sequences_cond #if str($create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences) == 'yes': #set create_corresponding_coding_sequences = True #else: #set create_corresponding_coding_sequences = False #end if #import os #set create_ortho_sequences = True #set orthogroups_fasta_src_dir = $os.path.join('geneFamilyClassification_dir', 'orthogroups_fasta') #set dest_dir = $output.extra_files_path mkdir -p $dest_dir && #else: #set create_ortho_sequences = False #set create_corresponding_coding_sequences = False #end if #else: #set create_ortho_sequences = False #set create_corresponding_coding_sequences = False #end if GeneFamilyClassifier --proteins "$input" --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" --scaffold "$scaffold" --method $method --classifier $save_hmmscan_log_cond.classifier --num_threads \${GALAXY_SLOTS:-4} #if str($options_type.options_type_selector) == 'advanced': --super_orthogroups $options_type.super_orthogroups #if str($options_type.single_copy_cond) == 'taxa': --single_copy_taxa $options_type.single_copy_cond.single_copy_taxa --taxa_present $options_type.single_copy_cond.taxa_present #end if #if str($create_orthogroup) == 'yes': --orthogroup_fasta #if $create_corresponding_coding_sequences: --coding_sequences "$create_corresponding_coding_sequences_cond.coding_sequences" #end if #end if #end if #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both': #if str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes': && mv geneFamilyClassification_dir/hmmscan.log $hmmscan_log #else: && rm geneFamilyClassification_dir/hmmscan.log #end if #end if #if $create_ortho_sequences: #if $create_corresponding_coding_sequences: && echo "# Precomputed orthologous gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output #else: && echo "# Precomputed orthologous gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output #end if && ls -al $orthogroups_fasta_src_dir | grep f >> $output && mv $orthogroups_fasta_src_dir/* $dest_dir || true #end if ]]> </command> <inputs> <param name="input" format="fasta" type="data" label="Amino acids (proteins) sequences fasta file"/> <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold"> <options from_data_table="plant_tribes_scaffolds" /> <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/> </param> <param name="method" type="select" label="Protein clustering method"> <option value="gfam" selected="true">GFam</option> <option value="orthofinder">OrthoFinder</option> <option value="orthomcl">OrthoMCL</option> </param> <conditional name="save_hmmscan_log_cond"> <param name="classifier" type="select" label="Protein classification method"> <option value="blastp" selected="true">blastp</option> <option value="hmmscan">HMMScan</option> <option value="both">Both blastp and HMMScan</option> </param> <when value="blastp" /> <when value="hmmscan"> <param name="save_hmmscan_log" type="select" label="Save hmmscan log?" help="Save the hmmscan log in an additional output dataset"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> </when> <when value="both"> <param name="save_hmmscan_log" type="select" label="Save hmmscan log?" help="Save the hmmscan log in an additional output dataset"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> </when> </conditional> <conditional name="options_type"> <param name="options_type_selector" type="select" label="Options Configuration"> <option value="basic" selected="true">Basic</option> <option value="advanced">Advanced</option> </param> <when value="basic" /> <when value="advanced"> <param name="super_orthogroups" type="select" label="Super Orthogroups" help="Secondary MCL clusters of orthogroups"> <option value="min_evalue" selected="true">Minimum e-value</option> <option value="avg_evalue">Average e-value</option> </param> <conditional name="single_copy_cond"> <param name="single_copy" type="select" label="Select single copy configuration"> <option value="custom" selected="true">Single copy orthogroup custom</option> <option value="taxa">Minumum single copy taxa required in orthogroup</option> </param> <when value="custom" /> <when value="taxa"> <param name="single_copy_taxa" type="integer" value="20" label="Minumum single copy taxa required in orthogroup"/> <param name="taxa_present" type="integer" value="21" label="Minumum taxa required in single copy orthogroup"/> </when> </conditional> <conditional name="create_orthogroup_cond"> <param name="create_orthogroup" type="select" label="Create orthogroup fasta files?"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="no" /> <when value="yes"> <conditional name="create_corresponding_coding_sequences_cond"> <param name="create_corresponding_coding_sequences" type="select" label="Create corresponding coding sequences?"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="no" /> <when value="yes"> <param name="coding_sequences" format="fasta" type="data" label="Corresponding coding sequences (CDS) fasta file"/> </when> </conditional> </when> </conditional> </when> </conditional> </inputs> <outputs> <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}"> <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter> </data> <data name="output" format="pgfc" label="Gene family clusters on ${on_string}"> <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'no'</filter> </data> <data name="output" format="pgfccs" label="Gene family clusters and corresponding coding sequences on ${on_string}"> <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'yes'</filter> </data> <collection name="orthos" type="list"> <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" /> </collection> </outputs> <tests> <test> <!-- Not sure how to test this since the tool requires scaffolds data which is extremely large and installed using a Data Manager --> <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta" /> <param name="scaffold" value="22Gv1.1"/> <param name="method" value="orthomcl"/> <param name="classifier" value="blastp"/> <param name="dereplicate" value="yes"/> <param name="min_length" value="200"/> <output_collection name="orthos" type="list"> <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/> <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/> <element name="proteins.blastp.22Gv1.1.bestOrthos.summary" file="proteins.blastp.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/> </output_collection> </test> </tests> <help> This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool classifies gene sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive to remote homologs) or both (more exhaustive). This tool accepts any of the following as input: * the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool * externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly * gene predictions from a sequenced genome ----- **Options** * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. * **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). * **Protein classification method** - blastp (faster), HMMScan (slower but more sensative to the remote homologs) or both (more exhaustive). * **Super Orthogroups** - Secondary MCL clusters of orthogroups. * **Minumum single copy taxa required in orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only. * **Minumum taxa required in single copy orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only. * **Corresponding coding sequences (CDS) fasta file** - Used only when selecting "Create orthogroup fasta files?". </help> <citations> <citation type="bibtex"> @unpublished{None, author = {Eric Wafula}, title = {None}, year = {None}, eprint = {None}, url = {None} }</citation> </citations> </tool>