Mercurial > repos > iuc > cami_amber

diff cami_amber.xml @ 0:c2162f17ceb7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/amber/ commit 91090b1565baeddbd3c96e74bc08a37990fafd3a
author: iuc
date: Sun, 25 Aug 2024 13:17:46 +0000
children: e166d7bc8fb5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cami_amber.xml	Sun Aug 25 13:17:46 2024 +0000
@@ -0,0 +1,465 @@
+<tool id="cami_amber" name="CAMI AMBER" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Evaluation package for MAGs</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+        #set $path_to_html = $html.files_path
+        #set $max = []
+        #set $min = []
+        #set $label = []
+
+        mkdir -p output inputs '$path_to_html' &&
+
+        #if $tox.ncbi.is_select == 'yes':
+            #if $tox.input.is_select == 'manually':
+                mkdir -p ncbi &&
+                #for $file in $tox.ncbi_dir:
+                    ln -s '$file' './ncbi/$file.element_identifier' &&
+                #end for
+            #end if
+        #end if
+
+        #for $i, $file in enumerate($input_files):
+            ln -s '$file.binning_files' './inputs/$file.binning_files.element_identifier' &&
+            #if $file.labels:
+                $label.append($file.labels)
+            #end if 
+        #end for
+
+        #if $genome.thresholds:
+            #for $i, $arg in enumerate($genome.thresholds):
+                #if $arg.min_completeness:
+                    $min.append($arg.min_completeness)
+                #end if
+                #if $arg.max_contamination:
+                    $max.append($arg.max_contamination)
+                #end if
+            #end for
+        #end if
+
+        amber.py
+        -g '${gold_standard_file}'
+        #if $label:
+            #set $sep = ''
+            -l
+            '
+            #for $lab in $label:
+                $sep$lab
+                #set $sep = ','
+            #end for
+            '
+        #end if
+        -p ${filter}
+        #if $min_length:
+            -n $min_length
+        #end if
+        #if $desc:
+            -d '${desc}'
+        #end if
+        #if $min:
+            #set $sep = ''
+            --min_completeness
+            '
+            #for $i in $min:
+                $sep$i
+                #set $sep = ','
+            #end for
+            '
+        #end if
+        #if $max:
+            #set $sep = ''
+            --max_contamination
+            '
+            #for $i in $max:
+                $sep$i
+                #set$sep = ','
+            #end for
+            '
+        #end if
+        #if $genome.remove_genomes:
+            -r '$genome.remove_genomes'
+        #end if
+        #if $genome.remove.is_select == 'yes':
+            -k '$genome.keyword'
+        #end if
+        #if $genome.genome_coverage:
+            --genome_coverage '$genome.genome_coverage'
+        #end if
+        #if $tox.ncbi.is_select == 'yes':
+            #if $tox.input.is_select == 'manually':
+                --ncbi_dir ncbi
+            #else:
+                --ncbi_dir '$tox.ncbi_dir.fields.path'
+            #end if
+        #end if
+        -o output
+        #for $i, $bin in enumerate($input_files):
+            'inputs/$bin.binning_files.element_identifier'
+        #end for
+
+        &&
+
+        mv 'output/heatmap_bar.png' '$path_to_html'
+
+    ]]>
+    </command>
+    <inputs>
+        <param argument="--gold_standard_file" format="tabular" type="data"
+            label="Mapping of contigs or reads"
+            help="Input the gold standard file here so amber know the correct IDs for each contig/read" />
+        <repeat name="input_files" title="Binning files and names "
+            help="Enter multiple binning files and names (names are optional). IMPORTANT: for each binning file you use in the program you need to state one label, this mean for example for 3 binning files you need 3 labels (3 slots) not more or less!">
+            <param name="binning_files" format="tabular" type="data"
+                label="Input bin files here" /> 
+            <param argument="--labels" type="text" value="" optional="true"
+                label="Name for bin" />
+        </repeat>
+        <param argument="--filter" type="integer" value="0" min="0"
+            label="Filter out the n smallest genome bins"
+            help="Optional filter for filter out the n smallest genome bins" />
+        <param argument="--min_length" type="integer" value="" optional="true"
+            label="Minimum length of sequences"
+            help="Input how long the sequences has to be" />
+        <param argument="--desc" type="text" value=""
+            label="HTML description"
+            help="Enter the HTML page description here" />
+        <section name="genome" title="Genome binning-specific options" >
+            <repeat name="thresholds" title="Min. completeness and max. contamination thresholds" 
+                help="Enter certain thresholds for min. completeness (Default %: 50,70,90) and certain thresholds for max. contamination (Default %: 5, 10), the program itself will transform it to %!" >
+                <param argument="--min_completeness" type="integer" value="" min="0" max="100" optional="true"
+                    label="Min. completeness threshold" />
+                <param argument="--max_contamination" type="integer" value="" min="0" max="100" optional="true"
+                    label="Max. contamination threshold" />
+            </repeat>
+            <param argument="--remove_genomes" type="data" format="tabular" optional="true" 
+                label="tsv file for genomes to remove"
+                help="Input a tsv file with binid and type in each line. In the help section is an example. WARNING: IF THE LIST CONTAIN ALL GENOME THE PROGRAM WILL FAIL!" />
+            <conditional name="remove">
+                <param name="is_select" type="select" label="Remove one or all genomes which are in the list?"
+                    help="Select yes and enter a keyword to remove certain type of genomes which are in the list. When all genomes in the list should remove also select yes and do not enter a keyword!">
+                    <option value="yes" selected="false">Yes</option>
+                    <option value="no" selected="true">No</option>
+                </param>
+                <when value="yes">
+                    <param argument="--keyword" type="text" value="" 
+                        label="Keyword for removing certain genomes"
+                        help="Input a keyword which should be match with binid giving in the file for removing genomes. When no keyword is giving the program remove all genomes which are in the list!" />
+                </when>
+                <when value="no" />
+            </conditional>
+            <param argument="--genome_coverage" type="data" format="tabular" optional="true"
+                label="Genome coverages tsv file"
+                help="Input a tsv file where the genome coverage is stated. Look at the help section to see how this file should look like!" />
+        </section>
+        <section name="tox" title="taxonomic binning-specific option">
+            <conditional name="ncbi">
+                <param name="is_select" type="select" label="Want to use the NCBI database?"
+                    help="Select yes if you want to use the NCBI database to evaluate taxonomic binning">
+                    <option value="yes" selected="false">Yes</option>
+                    <option value="no" selected="true">No</option>
+                </param>
+                <when value="yes"> 
+                    <conditional name="input">
+                        <param name="is_select" type="select" label="Select how you want to use NCBI database"
+                            help="Either select manually input or data manager. For more help read the help of the type of the input you selected." >
+                            <option value="manually">Manually</option>
+                            <option value="data">Data manager</option>
+                        </param>
+                        <when value="manually">
+                            <param argument="--ncbi_dir" format="tabular" type="data" multiple="true"
+                                label="Input .dmp files"
+                                help="To use the NCBI database we need to provide followed .dmp files: nodes.dmp, names.dmp and merged.dmp. You can get the via download of this file **ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz** and unzip it!" />
+                        </when>
+                        <when value="data" >  
+                            <param argument="--ncbi_dir" type="select"
+                                label="Assessing taxonomic binning"
+                                help="Include the NCBI taxonomy database. For this you can use the data manager data_manager_fetch_ncbi_taxonomy which can be install via galaxy. For more help look at the help section at the bottom!" >
+                                <options from_data_table="ncbi_taxonomy">
+                                    <validator message="No NCBI database is available" type="no_options"/>
+                                </options>
+                            </param>
+                        </when>
+                    </conditional> 
+                </when>
+                <when value="no"/>
+            </conditional>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="html" format="html" from_work_dir="output/index.html" label="${tool.name}: HTML" />
+        <data name="result" format="tabular" from_work_dir="output/results.tsv" label="${tool.name}: Results" />
+        <data name="metrics_genome" format="tabular" from_work_dir="output/genome_metrics_cami1.tsv" label="${tool.name}: Genome metrics" />
+        <data name="metrics_bin" format="tabular" from_work_dir="output/bin_metrics.tsv" label="${tool.name}: Bin metrics" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="gold_standard_file" value="gsa_mapping.binning" ftype="tabular" />
+            <repeat name="input_files">
+                <param name="binning_files" value="elated_franklin_0" ftype="tabular"/>
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="goofy_hypatia_2" ftype="tabular"/>
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="naughty_carson_2" ftype="tabular"/>
+            </repeat>
+            <output name="metrics_bin">
+                <assert_contents>
+                    <has_text text="genome_id" n="1" />
+                    <has_text text="sample_id" n="1" />
+                </assert_contents>
+            </output> 
+            <output name="metrics_genome">
+                <assert_contents>
+                    <has_text text="precision_seq" n="1" />
+                    <has_text text="total_length" n="1" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="gold_standard_file" value="gsa_mapping.binning" ftype="tabular" />
+            <repeat name="input_files">
+                <param name="binning_files" value="elated_franklin_0" ftype="tabular"/>
+                <param name="labels" value="test1" />
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="goofy_hypatia_2" ftype="tabular"/>
+                <param name="labels" value="test2" />
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="naughty_carson_2" ftype="tabular"/>
+                <param name="labels" value="test3" />
+            </repeat>
+            <param name="filter" value="1" />
+            <param name="min_length" value="200" />
+            <section name="genome" >
+                <repeat name="thresholds" >
+                    <param name="max_contamination" value="2" />
+                    <param name="min_completeness" value="50" />
+                </repeat>
+                <repeat name="thresholds" >
+                    <param name="min_completeness" value="70" />
+                </repeat>
+                <repeat name="thresholds" >
+                    <param name="min_completeness" value="90" />
+                </repeat>
+                <param name="genome_coverage" value="cami2_mouse_gut_average_genome_coverage.tsv" ftype="tabular" />
+                <param name="remove_genomes" value="unique_common.tsv" ftype="tabular" />
+                <conditional name="remove" >
+                    <param name="is_select" value="yes" />
+                    <param name="keyword" value="circular element" />
+                </conditional>
+            </section>
+            <section name="tox">
+                <conditional name="ncbi">
+                    <param name="is_select" value="yes" />
+                    <conditional name="input" >
+                        <param name="is_select" value="manually" />
+                        <param name="ncbi_dir" value="test-db/nodes.dmp,test-db/merged.dmp,test-db/names.dmp" ftype="tabular" />
+                    </conditional> 
+                </conditional>
+            </section> 
+            <output name="result">
+                <assert_contents>
+                    <has_text text="test1" n="1" />
+                    <has_text text="test2" n="1" />
+                    <has_text text="test3" n="1" />
+                </assert_contents>
+            </output> 
+        </test>
+        <test expect_num_outputs="4">
+            <param name="gold_standard_file" value="gsa_mapping.binning" ftype="tabular" />
+            <repeat name="input_files">
+                <param name="binning_files" value="elated_franklin_0" ftype="tabular"/>
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="goofy_hypatia_2" ftype="tabular"/>
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="naughty_carson_2" ftype="tabular"/>
+            </repeat>
+            <param name="desc" value="TEST FOR GALAXY" />
+            <section name="tox">
+                <conditional name="ncbi">
+                    <param name="is_select" value="yes" />
+                    <conditional name="input" >
+                        <param name="is_select" value="data" />
+                        <param name="ncbi_dir" value="test-db-tox" />
+                    </conditional> 
+                </conditional>
+            </section> 
+            <output name="html">
+                <assert_contents>
+                    <has_text text="TEST FOR GALAXY" n="1" />
+                </assert_contents>
+            </output>
+        </test><test expect_num_outputs="4">
+            <param name="gold_standard_file" value="test_gold.tsv" ftype="tabular" />
+            <repeat name="input_files">
+                <param name="binning_files" value="test_binning.tsv" ftype="tabular"/>
+            </repeat>
+            <repeat name="input_files">
+                <param name="binning_files" value="test_binning2.tsv" ftype="tabular"/>
+            </repeat>
+            <section name="tox">
+                <conditional name="ncbi">
+                    <param name="is_select" value="yes" />
+                    <conditional name="input" >
+                        <param name="is_select" value="data" />
+                        <param name="ncbi_dir" value="test-db-tox" />
+                    </conditional> 
+                </conditional>
+            </section> 
+            <output name="metrics_bin">
+                <assert_contents>
+                    <has_text text="genome_id" n="1" />
+                    <has_text text="sample_id" n="1" />
+                </assert_contents>
+            </output> 
+            <output name="metrics_genome">
+                <assert_contents>
+                    <has_text text="precision_seq" n="1" />
+                    <has_text text="total_length" n="1" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+        .. class:: infomark
+
+        **What is AMBER**
+
+        AMBER is an evaluation package for the comparative assessment of genome reconstructions and taxonomic assignments from metagenome benchmark datasets.
+
+        .. class:: infomark
+
+        **What it does**
+
+        AMBER calculate multiple metrics per bin and multiple metrics per sample. Each of them are provided then in results rankings, and comparative visualizations for assessing multiple programs or parameter effects.
+
+        For more information please visit `AMBER in GitHub <https://github.com/CAMI-challenge/AMBER>`_.
+
+        **Input**
+
+        AMBER use only 2 required inputs:
+
+        - The golden standard file (biobox format)
+
+        This file can be created via the add_length tool
+
+        .. class:: infomark
+
+        Example(tab separated)
+
+        ::
+        
+         @Version:0.9.1
+         @SampleID:CAMI_low
+
+         @@SEQUENCEID	BINID	_LENGTH
+         RL|S1|C10817	Sample18_57	20518
+         RL|S1|C11497	Sample22_57	37672
+         RL|S1|C6571	evo_1286_AP.033	69914
+         RL|S1|C10560	evo_1286_AP.033	995657
+         RL|S1|C13546	evo_1286_AP.033	626775
+
+        Note: This file looks similar to the binning files but the only different which is also is important is the length column.
+
+        - Multiple binning files (biobox format)
+
+        Files can be created via the convert_to_biobox tool
+
+        .. class:: infomark
+
+        Example(tab separated):
+
+        ::
+
+         #CAMI Format for Binning
+         @Version:0.9.0
+         @SampleID:CAMI_low
+         @@SEQUENCEID	BINID
+         RL|S1|C10	Bin_034
+         RL|S1|C100	Bin_023
+         RL|S1|C1000	Bin_034
+         RL|S1|C10000	Bin_019
+         RL|S1|C10002	Bin_035
+         RL|S1|C10004	Bin_035
+         RL|S1|C10008	Bin_034
+         RL|S1|C10011	Bin_035
+         RL|S1|C10012	Bin_013
+         RL|S1|C10014	Bin_035
+
+        There are also additional inputs which can be used:
+
+        - A genome list which should be removed(tabular format)
+
+        .. class:: infomark
+
+        Example(tab separated): 
+
+        ::
+
+         evo_1035930.029	common strain
+         1035930	common strain
+         evo_1035930.032	common strain
+         evo_1035930.011	common strain
+         evo_1286_AP.033	common strain
+         1286_AP	common strain
+         evo_1286_AP.026	common strain
+         evo_1286_AP.037	common strain
+         evo_1286_AP.008	common strain
+         1052944	common strain
+         1053058	common strain
+         1052947	common strain
+         evo_1049056.013	common strain
+         evo_1049056.031	common strain
+         evo_1049056.011	common strain
+         1049056	common strain
+         evo_1049056.039	common strain
+
+        Note: The first column contain the BINID and the second contain any kind of string. IMPORTANT: The argument where to state a keyword has to match to the anything in the second column to filter these kind of genomes out. If there is no keyword stated it can happen if the remove list contain all genomes which should be used then AMBER will fail since there are no genomes left to use!
+
+        - Genome coverage file (tabular format)
+
+        .. class:: infomark
+
+        Example(tab separated):
+
+        ::
+
+         @SampleID:gsa_pooled
+         @@GENOMEID	COVERAGE
+         4378740.0	82.85111527272727
+         4378740.1	27.159305090909097
+         denovo10559.0	2.1596957142857143
+         179927.0	1.6946866666666667
+         denovo8373.1	2.07144
+         136604.0	9.150489565217391
+         denovo8373.0	1.1413460000000002
+         269378.0	8.051563333333332
+         190114.0	18.253119629629627
+         228140.0	3.078681818181818
+         135956.0	121.52672015625001
+         259846.0	12.298210588235296
+         162576.0	9.57867191489362
+         184966.0	14.461031521739134
+
+        There is also an option to include the NCBI database in AMBER. This can be used when including the link which is stated in the data manager: **data_manager_fetch_ncbi_taxonomy**. This data manager download the current version of the database and store all files for you. If there are questions about data manager maybe have a look at this `Tutorial <https://usegalaxy.eu/training-material/topics/admin/tutorials/reference-genomes/tutorial.html>`_.
+
+        **Output**
+
+        AMBER will output 3 tsv files where each metrics value is stated. The important output is the HTML file where all data are included and also can be visualized with certain plots!
+
+        **Additional information**
+
+        The package **Bokeh** will create warnings which can be ignored since the only tell you that certain functions in the code are swapped with the new functions. AMBER was tested with the stated version of **Bokeh** and with the newest version of it and both generate the same output!
+    ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
author	iuc
date	Sun, 25 Aug 2024 13:17:46 +0000
parents
children	e166d7bc8fb5