Mercurial > repos > greg > kaks_analysis

<tool id="plant_tribes_kaks_analysis" name="Perform orthologous or paralogous ks analyses" version="0.7.0">
    <description>of coding sequences and amino acid sequences</description>
    <requirements>
        <requirement type="package" version="0.7">plant_tribes_kaks_analysis</requirement>
    </requirements>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <exit_code range=":-1" />
        <!-- In case the return code has not been set propery check stderr too -->
        <regex match="Error:" />
        <regex match="Exception:" />
    </stdio>
    <command>
        <![CDATA[
            #set output_dir = 'kaksAnalysis_dir'
            #set comparison = $comparison_cond.comparison
            #if str($options_type.options_type_selector) == 'advanced':
                #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond
                #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select
                #set fit_components_cond = $options_type.fit_components_cond
                #set fit_components = $fit_components_cond.fit_components
            #else:
                #set codeml_ctl_file_select = 'no'
                #set fit_components = 'no'
            #end if
            KaKsAnalysis
            --config_dir '$scaffold.fields.path'
            --num_threads \${GALAXY_SLOTS:-4}
            --coding_sequences_species_1 '$coding_sequences_species_1'
            --proteins_species_1 '$proteins_species_1'
            --comparison $comparison
            #if str($comparison) == 'orthologs':
                --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
                --proteins_species_2 '$comparison_cond.proteins_species_2'
            #end if
            #if str($options_type.options_type_selector) == 'advanced':
                --min_coverage $min_coverage
                --recalibration_rate $recalibration_rate
                #if str($codeml_ctl_file_select) == 'yes':
                    --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file'
                    # No else block needed here because the default codeml_ctl config
                    # will be used if the --codeml_ctl_file flag is missing.
                #end if
                #if str($fit_components) == 'yes':
                    --num_of_components $fit_components_cond.num_of_components
                    --min_ks $fit_components_cond.min_ks
                    --max_ks $fit_components_cond.max_ks
                #end if
            #end if
            >/dev/null
            && mv $output_dir/species1.fna '$output_species1_fna'
            && mv $output_dir/species1.faa '$output_species1_faa'
            #if str($comparison) == 'orthologs':
                && mv $output_dir/species1.fna.blastn.paralogs '$output_species1_paralog'
            #end if
            #if str($comparison) == 'orthologs':
                && mv $output_dir/species2.faa '$output_species2_faa'
                && mv $output_dir/species2.fna '$output_species2_fna'
                && mv $output_dir/species1.fna.blastn.orthologs '$output_species1_ortholog'
                && mv $output_dir/species2.fna.blastn.orthologs '$output_species2_ortholog'
            #end if
            && mv $output_dir/*.rbhb '$output_rbhb'
            && mv $output_dir/*.kaks '$output_kaks'
            #if str($fit_components) == 'yes':
                && mv $output_dir/*.components '$output_components'
            #end if
        ]]>
    </command>
    <inputs>
        <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the species" />
        <param name="proteins_species_1" format="fasta" type="data" label="Aamino acids (proteins) sequences fasta file for the species" />
        <conditional name="comparison_cond">
            <param name="comparison" type="select" label="Select method for pairwise sequence comparison to determine homolgous pairs" help="Cross species comparison requires selection of inputs for second species">
                <option value="paralogs" selected="true">Self species comparison</option>
                <option value="orthologs">Cross species comparison</option>
            </param>
            <when value="paralogs" />
            <when value="orthologs">
                <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the second species" />
                <param name="proteins_species_2" format="fasta" type="data" label="Aamino acids (proteins) sequences fasta file for the second species" />
            </when>
        </conditional>
        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold" help="Selection specifies default configuration files">
            <options from_data_table="plant_tribes_scaffolds" />
            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
        </param>
        <conditional name="options_type">
            <param name="options_type_selector" type="select" label="Options Configuration">
                <option value="basic" selected="true">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="Minimum sequence pairwise coverage length between homologous pairs" />
                <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Predetermined evolutionary rate for recalibrating synonymous subsitutions (ks) of species" />
                <conditional name="codeml_ctl_file_cond">
                    <param name="codeml_ctl_file_select" type="select" label="Select PAML codeml control file?" help="Used for ML analysis of protein-coding DNA sequences using codon substitution models, select No to use the default control file">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" />
                    </when>
                </conditional>
                <conditional name="fit_components_cond">
                    <param name="fit_components" type="select" label="Fit a mixture model of multivariate normal components to synonymous (ks) distribution?" help="Used to identify significant duplication events in a genome">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <param name="num_of_components" type="integer" value="0" min="0" label="Number components to fit to synonymous subsitutions (ks) distribution" />
                        <param name="min_ks" type="float" value="0.0" min="0.0" label="Lower limit of synonymous subsitutions (ks)" help="Reduces background noise from young paralogous pairs due to normal gene births and deaths in a genome" />
                        <param name="max_ks" type="float" value="0.0" min="0.0" label="Upper limit of synonymous subsitutions (ks)" help="Excludes likey ancient paralogous pairs" />
                    </when>
                </conditional>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_species1_fna" format="fasta" label="KaKs analysis (coding sequences) on ${on_string}" />
        <data name="output_species1_faa" format="fasta" label="KaKs analysis (amino acids) on ${on_string}" />
        <data name="output_species2_fna" format="fasta" label="KaKs analysis (coding sequences) on ${on_string}">
            <filter>comparison_cond['comparison'] == 'orthologs'</filter>
        </data>
        <data name="output_species2_faa" format="fasta" label="KaKs analysis (amino acids) on ${on_string}">
            <filter>comparison_cond['comparison'] == 'orthologs'</filter>
        </data>
        <data name="output_species1_paralog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
            <filter>comparison_cond['comparison'] == 'paralogs'</filter>
        </data>
        <data name="output_species1_ortholog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
            <filter>comparison_cond['comparison'] == 'orthologs'</filter>
        </data>
        <data name="output_species2_ortholog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
            <filter>comparison_cond['comparison'] == 'orthologs'</filter>
        </data>
        <data name="output_rbhb" format="tabular" label="KaKs analysis (paralogous pairs) on ${on_string}" />
        <data name="output_kaks" format="tabular" label="KaKs analysis on ${on_string}" />
        <data name="output_components" format="tabular" label="KaKs analysis (significant components in the ks distribution) on ${on_string}">
            <filter>options_type[options_type_selector'] == 'advanced' and options_type['fit_components_cond']['fit_components'] == 'yes</filter>
        </data>
    </outputs>
    <tests>
        <test>
        </test>
    </tests>
    <help>
This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool performs orthologous
or paralogous ks analyses of coding sequences and amino acid sequences.

-----

**Options**

 * **Required options**

  - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences.
  - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
  - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).

 * **Multiple sequence alignments options**

  - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments.
  - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs'.
  - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments.
  - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference.
  - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs' or this tool will produce an error.

 * **Phylogenetic trees options**

  - **Phylogenetic trees inference method** - Phylogenetic trees inference method.
  - **Select rooting order configuration for rooting trees??** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup.
  - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree.
  - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments.
  - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments.

 * **MSA quality control options**

  - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps).
  - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach.
  - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps).

    </help>
    <citations>
        <citation type="bibtex">
            @unpublished{None,
            author = {Eric Wafula},
            title = {None},
            year = {None},
            url = {https://github.com/dePamphilis/PlantTribes}
            }
        </citation>
        <citation type="doi">10.1186/1471-2105-10-421</citation>
        <citation type="doi">10.1093/molbev/msm088</citation>
        <citation type="doi">10.18637/jss.v004.i02</citation>
    </citations>
</tool>
author	greg
date	Wed, 01 Mar 2017 13:48:38 -0500
parents	ed921c8159bf
children	dab0ce7e128a