Mercurial > repos > pimarin > recentrifuge
view recentrifuge.xml @ 2:b135c5908e8c draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/recentrifuge commit 000b196a8781301582ee706ab287f65f27478a12-dirty"
author | pimarin |
---|---|
date | Wed, 06 Apr 2022 13:52:48 +0000 |
parents | e5474449c35d |
children | 2890083b1a84 |
line wrap: on
line source
<?xml version="1.0" encoding="UTF-8"?> <tool id="recentrifuge" name="Recentrifuge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> Robust comparative analysis and contamination removal for metagenomics </description> <macros> <import>macro.xml</import> </macros> <expand macro='xrefs'/> <expand macro="requirements" /> <expand macro="version_command" /> <command detect_errors="aggressive"><![CDATA[ ## database input ## ## if database imported from history ## #if $database.db_type.db_select == "cached" #set $rcf_db = $database.db_type.cached_db.fields.path #else if $database.db_type.db_select == "history" mkdir rcf_db && #for i in $database.db_type.history_db ln -s '$i' 'rcf_db/$i.element_identifier' && #end for #set $rcf_db = "rcf_db" #end if rcf -n $rcf_db ## input type ## #if $input_option.file_type.filetype == "centrifuge" -f '$input_option.input_file' #else if $input_option.file_type.filetype == "lmat" -l '$input_option.input_file' #else if $input_option.file_type.filetype == "clark" -r '$input_option.input_file' #else if $input_option.file_type.filetype == "kraken" -k '$input_option.input_file' #else if $input_option.file_type.filetype == "generic" -g '$input_option.input_file' --format '$input_option.file_type.format' #end if ## output option ## -e $output_option.extra -o output $output_option.pickle $output_option.nohtml ## advanced options ## #if $advanced_option.control_select.controls_type == "add_neg" --controls '$advanced_option.control_select.controls' #end if #if $advanced_option.scoring != "DEFAULT" --scoring '$advanced_option.scoring' #end if #if $advanced_option.minscore_select.minscore == "specify_minscore" --minscore '$advanced_option.minscore_select.minscore_value' #end if #if $advanced_option.mintaxa_type.mintaxa_select == "specify_mintaxa" --mintaxa '$advanced_option.mintaxa_type.mintaxa' #end if #if $advanced_option.exclude_taxa_type.exclude_taxa_select == "yes_exclude" --exclude '$advanced_option.exclude_taxa_type.exclude_taxa_name' #end if #if $advanced_option.include_taxa_type.include_taxa_select == "yes_include" --include '$advanced_option.include_taxa_type.include_taxa_name' #end if $advanced_option.avoidcross ## MORE ADVANCED OPTION ## #if $more_advanced_option.minscore_type.minscore_select == "specify_minscore" --ctrlminscore '$more_advanced_option.minscore_type.ctrlminscore' #end if #if $more_advanced_option.ctrlmintaxa_type.ctrlmintaxa_select =="specify_ctrlmintaxa" --ctrlmintaxa '$more_advanced_option.ctrlmintaxa_type.ctrlmintaxa' #end if --summary $more_advanced_option.summary $more_advanced_option.takeoutroot $more_advanced_option.nokollapse $more_advanced_option.strain $more_advanced_option.sequential $more_advanced_option.debug $more_advanced_option.version ## LOG FILE OUTPUT ## &> $logfile ]]> </command> <inputs> <!-- INPUT FILES --> <section name="input_option" title="Input options" expanded="true"> <param name="input_file" type="data" format="tabular" label="Select taxonomy file tabular formated"/> <conditional name="file_type"> <param name="filetype" type="select" label="Type of input file (centrifuge, CLARK, Generic, kraken, LMAT)" help="(-f, -r, -g, -k, -l)"> <option value="centrifuge">Centrifuge</option> <option value="clark">CLARK</option> <option value="generic">Generic</option> <option value="lmat" >LMAT</option> <option value="kraken" >Kraken</option> </param> <when value="centrifuge"/> <when value="lmat"/> <when value="clark"/> <when value="kraken"/> <when value="generic"> <param argument="--format" type="text" label="Format of the output files from a generic classifier" help="string like 'TYP:csv,TID:1,LEN:3,SCO:6,UNC:0' where valid file TYPes are csv/tsv/ssv, and the rest of fields indicate the number of column used (starting in 1) for the TaxIDs assigned,the LENgth of the read, the SCOre given to the assignment (--format)"> </param> </when> </conditional> </section> <!-- taxa databases --> <section name="database" title="Database type" expanded="true"> <conditional name="db_type"> <param name="db_select" type="select" label="Cached database with clade-specific marker genes"> <option value="cached" selected="true">Locally installed</option> <option value="history">From history</option> </param> <when value="cached"> <param name="cached_db" type="select" label="Cached database whith taxa ID"> <options from_data_table="rcf_database"> <validator message="No recentrifuge database is available" type="no_options"/> </options> </param> </when> <when value="history"> <param name="history_db" type="data" multiple="true" format="txt" label="Database from history"/> </when> </conditional> </section> <!-- output name --> <section name="output_option" title="Output options"> <param argument="--extra" type="select" label="Type of extra output to be generated (default on CSV)" help="(--extra)"> <option value="CSV" selected="true" >CSV</option> <option value="DYNOMICS">DYNOMICS</option> <option value="FULL">FULL</option> <option value="MULTICSV">MULTICSV</option> <option value="TSV" >TSV</option> </param> <param argument="--pickle" type="boolean" truevalue="--pickle" falsevalue="" label="Serialize statistics and data results in pandas DataFrames" help="(--pickle)"/> <param argument="--nohtml" type="boolean" truevalue="--nohtml" falsevalue="" label="Suppress saving the HTML output file" help="(--nohtml)"/> </section> <!-- ADVANCED OPTIONS --> <section name="advanced_option" title="Coarse tuning of algorithm parameters"> <conditional name="control_select"> <param name="controls_type" type="select" label="Number of first samples will be treated as negative controls (default is 0)" help="(--controls)"> <option value="default">No control</option> <option value="add_neg">Add negative controls </option> </param> <when value="default"> </when> <when value="add_neg"> <param name="controls" type="integer" min="0" value="0" label="Number of samples"/> </when> </conditional> <param name="scoring" type="select" label="Type of scoring to be applied" help="(--scoring)"> <option value="DEFAULT" selected="true">Default scoring</option> <option value="SHEL">SHEL</option> <option value="LENGTH">LENGTH</option> <option value="LOGLENGTH">LOGLENGTH</option> <option value="NORMA">NORMA</option> <option value="LMAT">LMAT</option> <option value="CLARK_C">CLARK_C</option> <option value="CLARK_G">CLARK_G</option> <option value="KRAKEN">KRAKEN</option> <option value="GENERIC">GENERIC</option> </param> <conditional name="minscore_select"> <param name="minscore" type="select" label="minimum score/confidence of the classification of a read to pass the quality filter; all pass by default" help="(--minscore)"> <option value="default" selected="true">Default all pass</option> <option value="specify_minscore">Specify value</option> </param> <when value="default"> </when> <when value="specify_minscore"> <param name="minscore_value" type="integer" min="0" value="0" label="minimum score/confidence value"/> </when> </conditional> <conditional name="mintaxa_type"> <param name="mintaxa_select" type="select" label="Minimum taxa to avoid collapsing one level into the parent (if not specified a value will be automatically assigned)" help="(--mintaxa)"> <option value="default" selected="true">Automatically assigned</option> <option value="specify_mintaxa">Choose value</option> </param> <when value="default"> </when> <when value="specify_mintaxa"> <param name="mintaxa" type="integer" min="0" value="0" label="Minimum taxa number"/> </when> </conditional> <conditional name="exclude_taxa_type"> <param name="exclude_taxa_select" type="select" label="NCBI taxid code to exclude a taxon and all underneath (default, no exclude)" help="(--exclude)"> <option value="no_exclude">No exclusion</option> <option value="yes_exclude">Specify excluded taxa</option> </param> <when value="yes_exclude"> <param name="exclude_taxa_name" type="text" label="NCBI taxid code to exclude" /> </when> <when value="no_exclude"/> </conditional> <conditional name="include_taxa_type"> <param name="include_taxa_select" type="select" label="NCBI taxid code to include a taxon and all underneath" help="(--include)"> <option value="no_include">Default no taxa include</option> <option value="yes_include">Specify included taxa</option> </param> <when value="yes_include"> <param name="include_taxa_name" type="text" label="NCBI taxid code to include"/> </when> <when value="no_include"/> </conditional> <param argument="--avoidcross" type="boolean" truevalue="--avoidcross" falsevalue="" label="Avoid cross analysis" help="(--avoidcross)"> </param> </section> <!-- Detailed more fine parameters --> <section name="more_advanced_option" title=" Fine tuning of algorithm parameters"> <conditional name="minscore_type"> <param name="minscore_select" type="select" label="minimum score/confidence of the classification of a read in control samples to pass the quality filter; it defaults to minscore" help="(--ctrlminscore)"> <option value="default_minscore">Default minscore</option> <option value="specify_minscore">Specify minscore</option> </param> <when value="default_minscore"/> <when value="specify_minscore"> <param name="ctrlminscore" type="integer" value="0" label="minimum score/confidence"/> </when> </conditional> <conditional name="ctrlmintaxa_type"> <param name="ctrlmintaxa_select" type="select" label="Minimum taxa to avoid collapsing one level into the parent" help="(--ctrlmintaxa)"> <option value="default_ctrlmintaxa">Default value</option> <option value="specify_ctrlmintaxa">Specify minimum taxa number</option> </param> <when value="default_ctrlmintaxa"/> <when value="specify_ctrlmintaxa"> <param name="ctrlmintaxa" type="integer" value="0" label="Minimum taxa number"/> </when> </conditional> <param name="summary" type="select" label="select to 'add' summary samples to other samples, or to 'only' show summary samples or to 'avoid' summaries at all" help="(--summary)"> <option value="ADD" selected="true">ADD</option> <option value="ONLY">ONLY</option> <option value="AVOID">AVOID</option> </param> <param argument="--takeoutroot" type="boolean" truevalue="--takeoutroot" falsevalue="" label="remove counts directly assigned to the root level" help="(--takeoutroot)"/> <param argument="--nokollapse" type="boolean" truevalue="--nokollapse" falsevalue="" label="show the cellular organisms taxon" help="(--nokollapse)"/> <param argument="--strain" type="boolean" truevalue="--strain" falsevalue="" label="Strain level instead of species as the resolution limit for the robust contamination removal algorithm; use with caution, this is an experimental feature" help="(--strain)" /> <param argument="--sequential" type="boolean" truevalue="--sequential" falsevalue="" label="deactivate parallel processing" help="(--sequential)" /> <param argument="--debug" type="boolean" truevalue="--debug" falsevalue="" label="increase output verbosity and perform additional checks" help="(--debug)" /> <param argument="--version" type="boolean" truevalue="--version" falsevalue="" label=" show program's version number and exit" help="(--version)" /> </section> </inputs> <!-- OUTPUT FILE, TYPE DEPENDING ON extra PARAMETER --> <outputs> <data name="html_report" format="html" from_work_dir="output.rcf.html" label="${tool.name} on ${on_string}: html report"> <filter> output_option['nohtml'] == False</filter> </data> <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/> <data name="data_csv" format="csv" from_work_dir="output.rcf.data.csv" label="${tool.name} on ${on_string}: data.csv"> <filter> output_option['extra'] == 'CSV' </filter> </data> <data name="stat_csv" format="csv" from_work_dir="output.rcf.stat.csv" label="${tool.name} on ${on_string}: stat csv"> <filter> output_option['extra'] == 'CSV' or output_option['extra'] == 'MULTICSV' </filter> </data> <data name="data_tsv" format="tabular" from_work_dir="output.rcf.data.tsv" label="${tool.name} on ${on_string}: data tsv"> <filter> output_option['extra'] == 'TSV' </filter> </data> <data name="stat_tsv" format="tabular" from_work_dir="output.rcf.stat.tsv" label="${tool.name} on ${on_string}: stat tsv"> <filter> output_option['extra'] == 'TSV' </filter> </data> <data name="xls_report" format="xlsx" from_work_dir="output.rcf.xlsx" label="${tool.name} on ${on_string}: xlsx report"> <filter> output_option['extra'] == 'FULL' or output_option['extra'] == 'DYNOMICS'</filter> </data> <data name="stat_bz" format="bz2" from_work_dir="output.rcf.stat.pkl.bz2" label="${tool.name} on ${on_string}: stat.pkl.bz2"> <filter> output_option['pickle'] == True </filter> </data> <data name="data_bz" format="bz2" from_work_dir="output.rcf.data.pkl.bz2" label="${tool.name} on ${on_string}: data.pkl.bz2"> <filter> output_option['pickle'] == True </filter> </data> </outputs> <tests> <test> <!-- kraken input and CSV output TEST_1--> <section name="database"> <conditional name="db"> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db-2022"/> </conditional> </section> <section name="input_option"> <param name="input_file" value="kraken_test/kraken.out"/> <conditional name="file_type"> <param name="filetype" value="kraken"/> </conditional> </section> <section name="output_option"> <param name="output_type" value="default_type"/> </section> <section name="more_advanced_option"> <param name="summary" value="AVOID"/> </section> <output name="data_csv" file="kraken_test/test1_csv.rcf.data.csv" lines_diff="2"/> <output name="stat_csv" file="kraken_test/test1_csv.rcf.stat.csv" lines_diff="2"/> <output name="html_report" file="kraken_test/test1_csv.rcf.html" lines_diff="2"/> <output name="logfile" file="kraken_test/test1_csv.log" lines_diff="7"/> </test> <test> <!-- centrifuge input and full options with imported database TEST_2 --> <section name="database"> <conditional name="db"> <param name="db_selector" value="history"/> <param name="history_db" value="test-db/delnodes.dmp,test-db/division.dmp,test-db/gc.prt,test-db/gencode.dmp,test-db/merged.dmp,test-db/names.dmp,test-db/nodes.dmp,test-db/readme.txt"/> </conditional> </section> <section name="input_option"> <param name="input_file" value="centrifuge_test/centrifuge.out"/> <conditional name="file_type"> <param name="filetype" value="centrifuge"/> </conditional> </section> <section name="output_option"> <param name="extra" value="MULTICSV"/> <param name="pickle" value="false"/> <param name="nohtml" value="true"/> </section> <section name="advanced_option"> <conditional name="control_select"> <param name="controls_type" value="add_neg"/> <param name="controls" value="0"/> </conditional> <param name="scoring" value="NORMA"/> <conditional name="minscore_select"> <param name="minscore" value="specify_minscore"/> <param name="minscore_value" value="0"/> </conditional> <param name="avoidcross" value="true"/> </section> <section name="more_advanced_option"> <conditional name="minscore_type"> <param name="minscore_select" value="specify_minscore"/> <param name="ctrlminscore" value="0"/> </conditional> <param name="summary" value="AVOID"/> </section> <output name="stat_csv" file="centrifuge_test/test2_multicsv.rcf.stat.csv" lines_diff="2"/> <output name="logfile" file="centrifuge_test/test2_multicsv.log" lines_diff="20"/> </test> <test> <!-- kraken input cached DB several option added and .bz2 files generated TEST_3 --> <section name="database"> <conditional name="db_type"> <param name="db_select" value="cached"/> <param name="cached_db" value="test-db-2022"/> </conditional> </section> <section name="input_option"> <param name="input_file" value="kraken_test/kraken.out"/> <conditional name="file_type"> <param name="filetype" value="kraken"/> </conditional> </section> <section name="output_option" > <param name="extra" value="TSV"/> <param name="pickle" value="false"/> <param name="nohtml" value="true"/> </section> <section name="advanced_option"> <param name="scoring" value="LOGLENGTH"/> </section> <section name="more_advanced_option"> <param name="summary" value="ONLY"/> <param name="strain" value="true"/> </section> <output name="data_tsv" file="kraken_test/test3_rcf.data.tsv" lines_diff="2"/> <output name="stat_tsv" file="kraken_test/test3_rcf.stat.tsv" lines_diff="2"/> <output name="logfile" file="kraken_test/test3_tsv.log" lines_diff="20"/> </test> </tests> <help> <![CDATA[ =-= /home/pierre/anaconda3/envs/rcf/bin/rcf =-= v1.8.1 - Mar 2022 =-= by Jose Manuel Martí =-= usage: rcf [-h] [-V] [-n PATH] [--format GENERIC_FORMAT] (-f FILE | -g FILE | -l FILE | -r FILE | -k FILE) [-o FILE] [-e OUTPUT_TYPE] [-p] [--nohtml] [-a | -c CONTROLS_NUMBER] [-s SCORING] [-y NUMBER] [-m INT] [-x TAXID] [-i TAXID] [-z NUMBER] [-w INT] [-u SUMMARY_BEHAVIOR] [-t] [--nokollapse] [-d] [--strain] [--sequential] Robust comparative analysis and contamination removal for metagenomics options: -h, --help show this help message and exit -V, --version show program's version number and exit input: Define Recentrifuge input files and formats -n PATH, --nodespath PATH path for the nodes information files (nodes.dmp and names.dmp from NCBI) --format GENERIC_FORMAT format of the output files from a generic classifier included with the option -g; It is a string like "TYP:csv,TID:1,LEN:3,SCO:6,UNC:0" where valid file TYPes are csv/tsv/ssv, and the rest of fields indicate the number of column used (starting in 1) for the TaxIDs assigned, the LENgth of the read, the SCOre given to the assignment, and the taxid code used for UNClassified reads -f FILE, --file FILE Centrifuge output files; if a single directory is entered, every .out file inside will be taken as a different sample; multiple -f is available to include several Centrifuge samples -g FILE, --generic FILE output file from a generic classifier; it requires the flag --format (see such option for details); multiple -g is available to include several generic samples -l FILE, --lmat FILE LMAT output dir or file prefix; if just "." is entered, every subdirectory under the current directory will be taken as a sample and scanned looking for LMAT output files; multiple -l is available to include several samples -r FILE, --clark FILE CLARK full-mode output files; if a single directory is entered, every .csv file inside will be taken as a different sample; multiple -r is available to include several CLARK, CLARK-l, and CLARK-S full-mode samples -k FILE, --kraken FILE Kraken output files; if a single directory is entered, every .krk file inside will be taken as a different sample; multiple -k is available to include several Kraken (version 1 or 2) samples output: Related to the Recentrifuge output files -o FILE, --outprefix FILE output prefix; if not given, it will be inferred from input files; an HTML filename is still accepted for backwards compatibility with legacy --outhtml option -e OUTPUT_TYPE, --extra OUTPUT_TYPE type of extra output to be generated, and can be one of ['FULL', 'CSV', 'MULTICSV', 'TSV', 'DYNOMICS'] -p, --pickle pickle (serialize) statistics and data results in pandas DataFrames (format affected by selection of --extra) --nohtml suppress saving the HTML output file tuning: Coarse tuning of algorithm parameters -a, --avoidcross avoid cross analysis -c CONTROLS_NUMBER, --controls CONTROLS_NUMBER this number of first samples will be treated as negative controls; default is no controls -s SCORING, --scoring SCORING type of scoring to be applied, and can be one of ['SHEL', 'LENGTH', 'LOGLENGTH', 'NORMA', 'LMAT', 'CLARK_C', 'CLARK_G', 'KRAKEN', 'GENERIC'] -y NUMBER, --minscore NUMBER minimum score/confidence of the classification of a read to pass the quality filter; all pass by default -m INT, --mintaxa INT minimum taxa to avoid collapsing one level into the parent (if not specified a value will be automatically assigned) -x TAXID, --exclude TAXID NCBI taxid code to exclude a taxon and all underneath (multiple -x is available to exclude several taxid) -i TAXID, --include TAXID NCBI taxid code to include a taxon and all underneath (multiple -i is available to include several taxid); by default, all the taxa are considered for inclusion fine tuning: Fine tuning of algorithm parameters -z NUMBER, --ctrlminscore NUMBER minimum score/confidence of the classification of a read in control samples to pass the quality filter; it defaults to "minscore" -w INT, --ctrlmintaxa INT minimum taxa to avoid collapsing one level into the parent (if not specified a value will be automatically assigned) -u SUMMARY_BEHAVIOR, --summary SUMMARY_BEHAVIOR choice for summary behaviour, and can be one of ['ADD', 'ONLY', 'AVOID'] -t, --takeoutroot remove counts directly assigned to the "root" level --nokollapse show the "cellular organisms" taxon advanced: Advanced modes of running -d, --debug increase output verbosity and perform additional checks --strain set strain level instead of species as the resolution limit for the robust contamination removal algorithm; use with caution, this is an experimental feature --sequential deactivate parallel processing rcf - Release 1.8.1 - Mar 2022 Copyright (C) 2017–2022, Jose Manuel Martí Martínez This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. ]]> </help> <expand macro="citations"/> </tool>