Mercurial > repos > iuc > dram_annotate
diff dram_annotate.xml @ 0:3f9129750546 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dram commit df10ba86507266a6a6f83c9bbefb7191a41b46f5
| author | iuc |
|---|---|
| date | Sat, 10 Dec 2022 21:13:10 +0000 |
| parents | |
| children | 3925138d40bf |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dram_annotate.xml Sat Dec 10 21:13:10 2022 +0000 @@ -0,0 +1,201 @@ +<tool id="dram_annotate" name="DRAM annotate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>metagenome-assembled-genomes (MAGs)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command detect_errors="exit_code"><![CDATA[ +#import os + +## DRAM names the genbank output based on the input_fasta file name +#set input_fasta_file_name = $os.path.splitext($os.path.basename(str($input_fasta)))[0] +#set output_gbk_file_name = $input_fasta_file_name + '.gbk' + +DRAM.py annotate +--input_fasta '$input_fasta' +--output_dir 'output_dir' +--min_contig_size $min_contig_size +--prodigal_mode '$prodigal_mode' +--trans_table $trans_table +--bit_score_threshold $bit_score_threshold +--rbh_bit_score_threshold $rbh_bit_score_threshold +$kofam_use_dbcan2_thresholds +#if str($custom_fasta_dbs_cond.custom_dbs_fasta) == 'yes': + #for additional_db in $custom_fasta_dbs_cond.custom_fasta_dbs: + --custom_db_name '$additional_db.custom_db_name' + --custom_fasta_loc '$additional_db.custom_fasta_loc' + #end for +#end if +#if str($custom_hmm_dbs_cond.custom_dbs_hmm) == 'yes': + #for additional_db in $custom_hmm_dbs_cond.custom_hmm_dbs: + --custom_hmm_name '$additional_db.custom_hmm_name' + --custom_hmm_loc '$additional_db.custom_hmm_loc' + #if $additional_db.custom_hmm_cutoffs_loc: + --custom_hmm_cutoffs_loc '$additional_db.custom_hmm_cutoffs_loc' + #end if + #end for +#end if +$use_uniref +$use_vogdb +$low_mem_mode +$skip_trnascan +--threads \${GALAXY_SLOTS:-10} +&& test -f 'output_dir/genes.faa' && mv 'output_dir/genes.faa' '$output_genes_faa' || echo 'No genes.faa output produced' +&& test -f 'output_dir/genes.fna' && mv 'output_dir/genes.fna' '$output_genes_fna' || echo 'No genes.fna output produced' +&& test -f 'output_dir/genes.gff' && mv 'output_dir/genes.gff' '$output_genes_gff' || echo 'No genes.gff output produced' +&& test -f 'output_dir/scaffolds.fna' && mv 'output_dir/scaffolds.fna' '$output_scaffolds_fna' || echo 'No scaffolds.fna output produced' +&& test -f 'output_dir/rrnas.tsv' && mv 'output_dir/rrnas.tsv' '$output_rrnas' || echo 'No rrnas.tsv output produced' +&& test -f 'output_dir/trnas.tsv' && mv 'output_dir/trnas.tsv' '$output_trnas' || echo 'No trnas.tsv output produced' +&& test -f 'output_dir/genbank/$output_gbk_file_name' && mv 'output_dir/genbank/$output_gbk_file_name' '$output_genbank' || echo 'No genbank output produced' +&& test -f 'output_dir/annotations.tsv' && mv 'output_dir/annotations.tsv' '$output_annotations' || echo 'No annotations.tsv output produced' + ]]></command> + <inputs> + <param argument="--input_fasta" type="data" format="fasta,fasta.gz" label="FASTA file"/> + <param argument="--min_contig_size" type="integer" value="2500" min="0" label="Minimum contig size to be used for gene prediction"/> + <param argument="--prodigal_mode" type="select" label="Select mode of prodigal to use for gene calling" help="Single mode requires genomes which are high quality with low contamination and long contigs (average length >3 Kbp)"> + <option value="single" selected="true">single</option> + <option value="meta">meta</option> + <option value="train">train</option> + </param> + <param argument="--trans_table" type="integer" value="11" min="1" max="25" label="Translation table for prodigal to use for gene calling"/> + <param argument="--bit_score_threshold" type="integer" value="60" min="1" label="Minimum bitScore of search to retain hits"/> + <param argument="--rbh_bit_score_threshold" type="integer" value="350" min="1" label="Minimum bitScore of reverse best hits to retain hits"/> + <param argument="--kofam_use_dbcan2_thresholds" type="boolean" truevalue="--kofam_use_dbcan2_thresholds" falsevalue="" checked="false" label="Use dbcan2 suggested HMM cutoffs for KOfam annotation instead of KOfam recommended cutoffs?"/> + <conditional name="custom_fasta_dbs_cond"> + <param name="custom_dbs_fasta" type="select" label="Specify additional fasta files against which to annotate?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <repeat name="custom_fasta_dbs" title="FASTA file" min="1"> + <param argument="--custom_db_name" type="text" value="CF1" label="Custom FASTA database name"> + <validator type="empty_field"/> + <expand macro="sanitizer"/> + </param> + <param argument="--custom_fasta_loc" type="data" format="fasta,fasta.gz" label="Custom database FASTA file"/> + </repeat> + </when> + </conditional> + <conditional name="custom_hmm_dbs_cond"> + <param name="custom_dbs_hmm" type="select" label="Specify additional hmm files against which to annotate?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <repeat name="custom_hmm_dbs" title="HMM file" min="1"> + <param argument="--custom_hmm_name" type="text" value="CH1" label="Custom HMM database name"> + <validator type="empty_field"/> + <expand macro="sanitizer"/> + </param> + <param argument="--custom_hmm_loc" type="data" format="hmm2,hmm3" label="Custom database HMM file"/> + <param argument="--custom_hmm_cutoffs_loc" type="data" format="tabular,tsv" optional="true" label="Custom HMM cutoffs and descriptions file" help="Optional, leave blank to ignore"/> + </repeat> + </when> + </conditional> + <param argument="--use_uniref" type="boolean" truevalue="--use_uniref" falsevalue="" checked="false" label="Annotate the input FASTA files against UniRef?" help="Increases run time and memory requirements"/> + <param argument="--use_vogdb" type="boolean" truevalue="--use_vogdb" falsevalue="" checked="false" label="Annotate the input FASTA files against VOGDB?" help="Decreases run time"/> + <param argument="--low_mem_mode" type="boolean" truevalue="--low_mem_mode" falsevalue="" checked="false" label="Skip annotating with uniref and use kofam?" help="Decreases memory requirements"/> + <param argument="--skip_trnascan" type="boolean" truevalue="--skip_trnascan" falsevalue="" checked="false" label="Skip transcan"/> + </inputs> + <outputs> + <data name="output_genes_faa" format="fasta" label="${tool.name} on ${on_string}: genes.faa"/> + <data name="output_genes_fna" format="fasta" label="${tool.name} on ${on_string}: genes.fna"/> + <data name="output_genes_gff" format="gff3" label="${tool.name} on ${on_string}: genes.gff"/> + <data name="output_scaffolds_fna" format="fasta" label="${tool.name} on ${on_string}: scaffolds"/> + <data name="output_rrnas" format="tabular" label="${tool.name} on ${on_string}: rrnas"/> + <data name="output_trnas" format="tabular" label="${tool.name} on ${on_string}: trnas"/> + <data name="output_genbank" format="genbank" label="${tool.name} on ${on_string}: genbank"/> + <data name="output_annotations" format="tabular" label="${tool.name} on ${on_string}: annotations"/> + </outputs> + <tests> + <test expect_num_outputs="8"> + <param name="input_fasta" value="input_annotate1.fasta.gz" ftype="fasta.gz"/> + <param name="prodigal_mode" value="meta"/> + <output name="output_genes_faa"> + <assert_contents> + <has_text text="rank"/> + </assert_contents> + </output> + <output name="output_genes_fna"> + <assert_contents> + <has_text text="rank"/> + </assert_contents> + </output> + <output name="output_genes_gff"> + <assert_contents> + <has_text text="gff-version 3"/> + </assert_contents> + </output> + <output name="output_scaffolds_fna"> + <assert_contents> + <has_text text="scaffold"/> + </assert_contents> + </output> + <output name="output_rrnas"> + <assert_contents> + <has_text text="scaffold"/> + </assert_contents> + </output> + <output name="output_trnas"> + <assert_contents> + <has_text text="fasta"/> + </assert_contents> + </output> + <output name="output_genbank"> + <assert_contents> + <has_text text="LOCUS"/> + </assert_contents> + </output> + <output name="output_annotations"> + <assert_contents> + <has_text text="fasta"/> + </assert_contents> + </output> + </test> + <!-- These settings require a db which doesn't exist in the test environment --> + <test expect_failure="true"> + <param name="input_fasta" value="input_annotate1.fasta.gz" ftype="fasta.gz"/> + <param name="custom_dbs_fasta" value="yes"/> + <repeat name="custom_fasta_dbs"> + <param name="custom_db_name" value="CF1"/> + <param name="custom_fasta_loc" value="annotate_custom.fasta" ftype="fasta"/> + </repeat> + <param name="custom_dbs_hmm" value="yes"/> + <repeat name="custom_hmm_dbs"> + <param name="custom_hmm_name" value="CH1"/> + <param name="custom_hmm_loc" value="annotate_custom.hmm" ftype="hmm3"/> + </repeat> + <param name="prodigal_mode" value="meta"/> + <assert_stderr> + <has_text text="returned non-zero exit status"/> + </assert_stderr> + </test> + </tests> + <help> +**What it does** + +@WHATITDOESHEADER@ + +This tool annotates genes by assigning database identifiers to them. Short contigs (default less than 2,500 bp) are initially +removed and then Prodigal is used to detect open reading frames (ORFs) and to predict their amino acid sequences. Next, DRAM +searches all amino acid sequences against multiple databases, providing a single Raw output. When gene annotation is complete, +all results are merged in a single tabular annotation table, including the best hit for each database, for user comparison. + +This tool accepts assembly-derived fastA files which may come from unbinned data (metagenome contig or scaffold files) or +genome-resolved data from one or many organisms (isolate genomes, single-amplified genome (SAGs), MAGs). + +This tool produces the following outputs. + + * A tabular file with all annotations from Pfam, UniProt, dbCAN and MEROPS databases for all genes in the input genomes + * Genbank files for each genome + * A gff file of all annotations across genomes + * A fasta file of each open reading frame amino acid sequence and best ranked annotation + * Tabular files with tRNAs and rRNAs + +@WHATITDOESFOOTER@ + </help> + <expand macro="citations"/> +</tool>
