Mercurial > repos > greg > kaks_analysis
comparison kaks_analysis.xml @ 0:ef6cd36613a8 draft
Uploaded
author | greg |
---|---|
date | Wed, 01 Mar 2017 10:29:54 -0500 |
parents | |
children | 431e4f3487a7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ef6cd36613a8 |
---|---|
1 <tool id="plant_tribes_kaks_analysis" name="Create multiple sequence alignments" version="0.7.0"> | |
2 <description>and inferred maximum likelihood phylogenies</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.7">plant_tribes_kaks_analysis</requirement> | |
5 </requirements> | |
6 <stdio> | |
7 <!-- Anything other than zero is an error --> | |
8 <exit_code range="1:" /> | |
9 <exit_code range=":-1" /> | |
10 <!-- In case the return code has not been set propery check stderr too --> | |
11 <regex match="Error:" /> | |
12 <regex match="Exception:" /> | |
13 </stdio> | |
14 <command> | |
15 <![CDATA[ | |
16 #if str($options_type.options_type_selector) == 'advanced': | |
17 #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond | |
18 #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select | |
19 #set fit_components_cond = $options_type.fit_components_cond | |
20 #set fit_components = $fit_components_cond.fit_components | |
21 #end if | |
22 KaKsAnalysis | |
23 --config_dir '$scaffold.fields.path' | |
24 --num_threads \${GALAXY_SLOTS:-4} | |
25 --coding_sequences_species_1 '$coding_sequences_species_1' | |
26 --proteins_species_1 '$proteins_species_1' | |
27 --comparison $comparison | |
28 #if str($comparison) == 'orthologs': | |
29 --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2' | |
30 --proteins_species_2 '$comparison_cond.proteins_species_2' | |
31 #end if | |
32 #if str($options_type.options_type_selector) == 'advanced': | |
33 --min_coverage $min_coverage | |
34 --recalibration_rate $recalibration_rate | |
35 #if str($codeml_ctl_file_select) == 'yes': | |
36 --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file' | |
37 # No else block needed here because the default codeml_ctl config | |
38 # will be used if the --codeml_ctl_file flag is missing. | |
39 #end if | |
40 #if str($fit_components) == 'yes': | |
41 --num_of_components $fit_components_cond.num_of_components | |
42 --min_ks $fit_components_cond.min_ks | |
43 --max_ks $fit_components_cond.max_ks | |
44 #end if | |
45 #end if | |
46 ]]> | |
47 </command> | |
48 <inputs> | |
49 <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the species" /> | |
50 <param name="proteins_species_1" format="fasta" type="data" label="Aamino acids (FNA) sequences fasta file for the species" /> | |
51 <conditional name="comparison_cond"> | |
52 <param name="comparison" type="select" label="Select method for pairwise sequence comparison to determine homolgous pairs" help="Cross species comparison requires selection of inputs for second species"> | |
53 <option value="paralogs" selected="true">Self species comparison</option> | |
54 <option value="orthologs">Cross species comparison</option> | |
55 </param> | |
56 <when value="paralogs" /> | |
57 <when value="orthologs"> | |
58 <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the second species" /> | |
59 <param name="proteins_species_2" format="fasta" type="data" label="Aamino acids (FNA) sequences fasta file for the second species" /> | |
60 </when> | |
61 </conditional> | |
62 <conditional name="options_type"> | |
63 <param name="options_type_selector" type="select" label="Options Configuration"> | |
64 <option value="basic" selected="true">Basic</option> | |
65 <option value="advanced">Advanced</option> | |
66 </param> | |
67 <when value="basic" /> | |
68 <when value="advanced"> | |
69 <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="Minimum sequence pairwise coverage length between homologous pairs" /> | |
70 <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Predetermined evolutionary rate for recalibrating synonymous subsitutions (ks) of species" /> | |
71 <conditional name="codeml_ctl_file_cond"> | |
72 <param name="codeml_ctl_file_select" type="select" label="Select PAML codeml control file?" help="Used for ML analysis of protein-coding DNA sequences using codon substitution models, select No to use the default control file"> | |
73 <option value="no" selected="true">No</option> | |
74 <option value="yes">Yes</option> | |
75 </param> | |
76 <when value="no" /> | |
77 <when value="yes"> | |
78 <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" /> | |
79 </when> | |
80 </conditional> | |
81 <conditional name="fit_components_cond"> | |
82 <param name="fit_components" type="select" label="Fit a mixture model of multivariate normal components to synonymous (ks) distribution?" help="Used to identify significant duplication events in a genome"> | |
83 <option value="no" selected="true">No</option> | |
84 <option value="yes">Yes</option> | |
85 </param> | |
86 <when value="no" /> | |
87 <when value="yes"> | |
88 <param name="num_of_components" type="integer" value="0" min="0" label="Number components to fit to synonymous subsitutions (ks) distribution" /> | |
89 <param name="min_ks" type="float" value="0.0" min="0.0" label="Lower limit of synonymous subsitutions (ks)" help="Reduces background noise from young paralogous pairs due to normal gene births and deaths in a genome" /> | |
90 <param name="max_ks" type="float" value="0.0" min="0.0" label="Upper limit of synonymous subsitutions (ks)" help="Excludes likey ancient paralogous pairs" /> | |
91 </when> | |
92 </conditional> | |
93 </when> | |
94 </conditional> | |
95 </inputs> | |
96 <outputs> | |
97 <collection name="ks_files" type="list"> | |
98 <discover_datasets pattern="__name__" directory="kaksAnalysis_dir" visible="false" ext="fasta" /> | |
99 </collection> | |
100 </outputs> | |
101 <tests> | |
102 <test> | |
103 </test> | |
104 </tests> | |
105 <help> | |
106 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of | |
107 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. orthologous or paralogous ks | |
108 analyses of coding sequences and amino acid sequences. | |
109 | |
110 ----- | |
111 | |
112 **Options** | |
113 | |
114 * **Required options** | |
115 | |
116 - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences. | |
117 - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. | |
118 - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). | |
119 | |
120 * **Multiple sequence alignments options** | |
121 | |
122 - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments. | |
123 - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs'. | |
124 - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments. | |
125 - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference. | |
126 - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs' or this tool will produce an error. | |
127 | |
128 * **Phylogenetic trees options** | |
129 | |
130 - **Phylogenetic trees inference method** - Phylogenetic trees inference method. | |
131 - **Select rooting order configuration for rooting trees??** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup. | |
132 - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree. | |
133 - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments. | |
134 - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments. | |
135 | |
136 * **MSA quality control options** | |
137 | |
138 - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps). | |
139 - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach. | |
140 - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps). | |
141 | |
142 </help> | |
143 <citations> | |
144 <citation type="bibtex"> | |
145 @unpublished{None, | |
146 author = {Eric Wafula}, | |
147 title = {None}, | |
148 year = {None}, | |
149 url = {https://github.com/dePamphilis/PlantTribes} | |
150 } | |
151 </citation> | |
152 <citation type="doi">10.1186/1471-2105-10-421</citation> | |
153 <citation type="doi">10.1093/molbev/msm088</citation> | |
154 <citation type="doi">10.18637/jss.v004.i02</citation> | |
155 </citations> | |
156 </tool> |