Mercurial > repos > bebatut > rdptools
diff framebot.xml @ 0:73b3e2f98631 draft default tip
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/rdptools commit fa8135b9b1918785c3f3b3fb7325bfe031b44ec4
author | bebatut |
---|---|
date | Mon, 16 Nov 2015 02:46:50 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/framebot.xml Mon Nov 16 02:46:50 2015 -0500 @@ -0,0 +1,221 @@ +<tool id="framebot" name="Framebot" version="0.1.0"> + <description> to coorect frameshift and classify nearest neighbor</description> + + <requirements> + <requirement type="package" version="2.0.2">rdptools</requirement> + </requirements> + + <stdio> + <exit_code range="1:" /> + </stdio> + + <command><![CDATA[ + java -jar \${RDP_TOOLS_DIR}/FrameBot.jar framebot + -a $alignment_mode + -i $identity_cutoff + -k $knn + -l $length_cutoff + + #if $no_metric_search.no_metric_search_test + -N + -e $no_metric_search.gap_ext_penalty + -f $no_metric_search.frameshift_penalty + -g $no_metric_search.gap_open_penalty + #else + -m $no_metric_search.max_radius + #end if + + -o output + + #if str( $databases.databases_selector ) == 'history' + $databases.databases_name + #else + #set $data_table = dict([(_[0], _[2]) for _ in $databases.databases_input.input.options.tool_data_table.data]) + $data_table[$databases.databases_input.value] + #end if + + $framebot_input_sequence_file + ]]></command> + + <inputs> + <param name="framebot_input_sequence_file" type="data" format="fasta" + label="Input sequence file" help=""/> + + <conditional name="databases"> + <param name="databases_selector" type="select" label="Databases to query" help=""> + <option value="cached" selected="true">Public reference gene databases</option> + <option value="history">Databases from your history</option> + </param> + <when value="cached"> + <param name="databases_input" label="Reference gene databases" type="select" display="radio"> + <options from_data_table="framebot_ref_gene_databases" /> + <validator type="no_options" message="Select at least one database"/> + </param> + </when> + <when value="history"> + <param name="databases_name" type="data" format="fasta" label="Reference gene database" + multiple="false" help=""/> + </when> + </conditional> + + <param name="alignment_mode" type="select" display="radio" + label="Alignment mode" help=""> + <option value="glocal">Glocal</option> + <option value="local">Local</option> + <option value="global">Global</option> + </param> + + <param name="identity_cutoff" type="float" min="0" max="1" value="0.4" + label="Percent identity cutoff" help=""/> + + <conditional name="no_metric_search"> + <param name="no_metric_search_test" type='boolean' checked="true" label="Disable + metric search?" help="Provide fasta file of seeds instead of + index file"/> + <when value="true"> + <param name="gap_ext_penalty" type="integer" min="-10" max="0" + value="-1" label="Gap extension penalty" help=""/> + <param name="frameshift_penalty" type="integer" min="-20" max="0" + value="-10" label="Frameshift penalty" help=""/> + <param name="gap_open_penalty" type="integer" min="-20" max="0" + value="-10" label="Gap opening penalty" help=""/> + </when> + <when value="false"> + <param name="max_radius" type="float" min="1" max="2147483647" + value="100" label="Maximum radius" help=""/> + </when> + </conditional> + + <param name="knn" type="integer" min="0" max="100" value="10" + label="The top k closest protein targets" help=""/> + <param name="length_cutoff" type="integer" min="0" max="100" value="80" + label="Length cutoff in number of amino acids" help=""/> + + <param name="transl_table" type="select" display="radio" label="Protein + translation table to use" help="NCBI Translation Tables"> + <option value="1">Standard Code</option> + <option value="2">Vertebrate Mitochondrial Code</option> + <option value="3">Yeast Mitochondrial Code</option> + <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">Invertebrate Mitochondrial Code</option> + <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">Echinoderm and Flatworm Mitochondrial Code</option> + <option value="10">Euplotid Nuclear Code</option> + <option value="11" selected="true">Bacterial, Archaeal and Plant Plastid Code</option> + <option value="12">Alternative Yeast Nuclear Code</option> + <option value="13">Ascidian Mitochondrial Code</option> + <option value="14">Alternative Flatworm Mitochondrial Code</option> + <option value="16">Chlorophycean Mitochondrial Code</option> + <option value="21">Trematode Mitochondrial Code</option> + <option value="22">Scenedesmus obliquus Mitochondrial Code</option> + <option value="23">Thraustochytrium Mitochondrial Code</option> + <option value="24">Pterobranchia Mitochondrial Code</option> + <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> + </param> + + <param name="word_size" type="integer" min="3" max="6" value="4" + label="Word size used to find closest protein target" help=""/> + </inputs> + + <outputs> + <data format="txt" name="conserved_alignment_file" + from_work_dir="output_framebot.txt" + label="Conserved alignments to the neared match of ${on_string} (Framebot)" /> + <data format="fasta" name="corr_nucl_output" + from_work_dir="output_corr_nucl.fasta" + label="Frameshift-corrected nucleotide sequences of ${on_string} (Framebot)" /> + <data format="fasta" name="corr_prot_output" + from_work_dir="output_corr_prot.fasta" + label="Frameshift-corrected protein sequences of ${on_string} (Framebot)" /> + + <data format="txt" name="failed_alignment_file" + from_work_dir="output_failed_framebot.txt" + label="Rejected alignments to the neared match of ${on_string} (Framebot)" /> + <data format="fasta" name="failed_nucl_output" + from_work_dir="output_failed_nucl.fasta" + label="Non frameshift-corrected nucleotide sequences of ${on_string} (Framebot)" /> + </outputs> + + <tests> + <test> + <param name="framebot_input_sequence_file" value="framebot_input_sequence.fasta"/> + <param name="databases_selector" value="cached" /> + <param name="databases_input" value="amoA_prot_ref" /> + <param name="alignment_mode" value="glocal" /> + <param name="denovo_abund_cutoff" value="10" /> + <param name="denovo_id_cutoff" value="0.7" /> + <param name="identity_cutoff" value="0.4" /> + <param name="no_metric_search_test" value="true" /> + <param name="gap_ext_penalty" value="-1" label="Gap extension penalty" help=""/> + <param name="frameshift_penalty" value="-10" label="Frameshift penalty" help=""/> + <param name="gap_open_penalty" value="-10" label="Gap opening penalty" help=""/> + <param name="no_prefilter" value="false" /> + <param name="scoring_matrix" value="Blosum62"/> + <param name="knn" value="10" /> + <param name="length_cutoff" value="80"/> + <param name="transl_table" value="11"/> + <param name="word_size" value="4"/> + <param name="de_novo" value="false"/> + + <output name="conserved_alignment_file" file="framebot_conserved_alignment_file.txt"/> + <output name="corr_nucl_output" file="framebot_corr_nucl_output.fasta"/> + <output name="corr_prot_output" file="framebot_corr_prot_output.fasta"/> + <output name="failed_alignment_file" file="framebot_failed_alignment_file.txt"/> + <output name="failed_nucl_output" file="framebot_failed_nucl_output.fasta"/> + </test> + </tests> + + <help><![CDATA[ + +**What it does** + +RDP FrameBot is a frameshift correction and nearest neighbor classification tool for use with high-throughput amplicon sequencing. It uses a dynamic programming algorithm to align each query DNA sequence against a set of target protein sequences, produces frameshift-corrected protein and DNA sequences and an optimal global or local protein alignment. +More information on `Github repository <https://github.com/rdpstaff/Framebot>`_. + +----- + +**Input** + +One protein reference fasta file or index file, and one DNA query fasta file are required. + +Several reference sets for a list of genes are available. +But personal own set of reference sequences can be provide as representative of the gene of interest (`http://fungene.cme.msu.edu <http://fungene.cme.msu.edu>`_ is a good resource). +The reference set must contain protein or DNA representative sequences of the gene target and should be compiled to have a good coverage of diversity of the gene family. FrameBot is significantly more accurate when the nearest target protein sequence (from the reference set) is at least 50% identical to the query read. Running FrameBot is computationally intensive in no-metric-search mode because it performs all-against-all comparisons between query DNA and the target protein sequences. Therefore we recommend limiting your reference set to 200 protein sequences for no-metric-search mode. The index metic-search mode gains more than 10-fold speedup by reducing the number of comparisons (see FrameBot citation). A larger DNA reference set can be used. + +----- + +**Parameters** + +The parameters are numerous in Framebot + + - The alignment mode: glocal, local or global + - The minimum abundance for de-novo mode + - The maxmimum aa identity cutoff for de-novo mode + - The Percent identity cutoff + - The top k closest protein targets + - Length cutoff in number of amino acids + - Disable metric search (provide fasta file of seeds instead of index file) + - Result file name stem + - Disable the pre-filtering step for non-metric search + - Sequence quality data + - Protein translation table to use + - The word size used to find closest protein targets + - ... + +----- + +**Output** + +The framebot step produces five output files: + + - the alignment to the nearest match satisfying the minimum length and protein identity cutoff. + - the frameshift-corrected nucleotide and protein sequences satisfying the minimum length and protein identity cutoff. + - the alignment to the nearest match that failed the minimum length and protein identity cutoff. + - a fasta file containing the nucleotide sequences that failed the minimum length and protein identity cutoff. + + ]]></help> + + <citations> + <citation type="doi">10.1128/mBio.00592-13</citation> + </citations> +</tool> \ No newline at end of file