Mercurial > repos > pimarin > recentrifuge
diff recentrifuge.xml @ 0:e5474449c35d draft
"planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/recentrifuge commit 000b196a8781301582ee706ab287f65f27478a12-dirty"
author | pimarin |
---|---|
date | Wed, 06 Apr 2022 10:31:22 +0000 |
parents | |
children | b135c5908e8c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/recentrifuge.xml Wed Apr 06 10:31:22 2022 +0000 @@ -0,0 +1,509 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<tool id="recentrifuge" name="Recentrifuge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + Robust comparative analysis and contamination removal for metagenomics + </description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro='xrefs'/> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="aggressive"><![CDATA[ + ## database input ## + ## if database imported from history ## + #if $database.db_type.db_select == "cached" + #set $rcf_db = $database.db_type.cached_db.fields.path + #else if $database.db_type.db_select == "history" + mkdir rcf_db && + #for i in $database.db_type.history_db + ln -s '$i' 'rcf_db/$i.element_identifier' && + #end for + #set $rcf_db = "rcf_db" + #end if + rcf + -n $rcf_db + ## input type ## + #if $input_option.file_type.filetype == "centrifuge" + -f '$input_option.input_file' + #else if $input_option.file_type.filetype == "lmat" + -l '$input_option.input_file' + #else if $input_option.file_type.filetype == "clark" + -r '$input_option.input_file' + #else if $input_option.file_type.filetype == "kraken" + -k '$input_option.input_file' + #else if $input_option.file_type.filetype == "generic" + -g '$input_option.input_file' + --format '$input_option.file_type.format' + #end if + ## output option ## + -e $output_option.extra + -o output + $output_option.pickle + $output_option.nohtml + ## advanced options ## + #if $advanced_option.control_select.controls_type == "add_neg" + --controls '$advanced_option.control_select.controls' + #end if + #if $advanced_option.scoring != "DEFAULT" + --scoring '$advanced_option.scoring' + #end if + #if $advanced_option.minscore_select.minscore == "specify_minscore" + --minscore '$advanced_option.minscore_select.minscore_value' + #end if + #if $advanced_option.mintaxa_type.mintaxa_select == "specify_mintaxa" + --mintaxa '$advanced_option.mintaxa_type.mintaxa' + #end if + #if $advanced_option.exclude_taxa_type.exclude_taxa_select == "yes_exclude" + --exclude '$advanced_option.exclude_taxa_type.exclude_taxa_name' + #end if + #if $advanced_option.include_taxa_type.include_taxa_select == "yes_include" + --include '$advanced_option.include_taxa_type.include_taxa_name' + #end if + $advanced_option.avoidcross + ## MORE ADVANCED OPTION ## + #if $more_advanced_option.minscore_type.minscore_select == "specify_minscore" + --ctrlminscore '$more_advanced_option.minscore_type.ctrlminscore' + #end if + #if $more_advanced_option.ctrlmintaxa_type.ctrlmintaxa_select =="specify_ctrlmintaxa" + --ctrlmintaxa '$more_advanced_option.ctrlmintaxa_type.ctrlmintaxa' + #end if + --summary $more_advanced_option.summary + $more_advanced_option.takeoutroot + $more_advanced_option.nokollapse + $more_advanced_option.strain + $more_advanced_option.sequential + $more_advanced_option.debug + $more_advanced_option.version + ## LOG FILE OUTPUT ## + &> $logfile + ]]> + + </command> + <inputs> + <!-- INPUT FILES --> + <section name="input_option" title="Input options" expanded="true"> + <param name="input_file" type="data" format="tabular" label="Select taxonomy file tabular formated"/> + <conditional name="file_type"> + <param name="filetype" type="select" label="Type of input file (centrifuge, CLARK, Generic, kraken, LMAT)" help="(-f, -r, -g, -k, -l)"> + <option value="centrifuge">Centrifuge</option> + <option value="clark">CLARK</option> + <option value="generic">Generic</option> + <option value="lmat" >LMAT</option> + <option value="kraken" >Kraken</option> + </param> + <when value="centrifuge"/> + <when value="lmat"/> + <when value="clark"/> + <when value="kraken"/> + <when value="generic"> + <param argument="--format" type="text" label="Format of the output files from a generic classifier" + help="string like 'TYP:csv,TID:1,LEN:3,SCO:6,UNC:0' + where valid file TYPes are csv/tsv/ssv, and the rest of fields indicate the number of column used (starting in 1) + for the TaxIDs assigned,the LENgth of the read, the SCOre given to the assignment (--format)"> + </param> + </when> + </conditional> + </section> + <!-- taxa databases --> + <section name="database" title="Database type" expanded="true"> + <conditional name="db_type"> + <param name="db_select" type="select" label="Cached database with clade-specific marker genes"> + <option value="cached" selected="true">Locally installed</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="cached_db" type="select" label="Cached database whith taxa ID"> + <options from_data_table="rcf_database"> + <validator message="No recentrifuge database is available" type="no_options"/> + </options> + </param> + </when> + <when value="history"> + <param name="history_db" type="data" multiple="true" format="txt" label="Database from history"/> + </when> + </conditional> + </section> + <!-- output name --> + <section name="output_option" title="Output options"> + <param argument="--extra" type="select" label="Type of extra output to be generated (default on CSV)" help="(--extra)"> + <option value="CSV" selected="true" >CSV</option> + <option value="DYNOMICS">DYNOMICS</option> + <option value="FULL">FULL</option> + <option value="MULTICSV">MULTICSV</option> + <option value="TSV" >TSV</option> + </param> + <param argument="--pickle" type="boolean" truevalue="--pickle" falsevalue="" label="Serialize statistics and data results in pandas DataFrames" help="(--pickle)"/> + <param argument="--nohtml" type="boolean" truevalue="--nohtml" falsevalue="" label="Suppress saving the HTML output file" help="(--nohtml)"/> + </section> + <!-- ADVANCED OPTIONS --> + <section name="advanced_option" title="Coarse tuning of algorithm parameters"> + <conditional name="control_select"> + <param name="controls_type" type="select" label="Number of first samples will be treated as negative controls (default is 0)" help="(--controls)"> + <option value="default">No control</option> + <option value="add_neg">Add negative controls </option> + </param> + <when value="default"> + </when> + <when value="add_neg"> + <param name="controls" type="integer" min="0" value="0" label="Number of samples"/> + </when> + </conditional> + <param name="scoring" type="select" label="Type of scoring to be applied" help="(--scoring)"> + <option value="DEFAULT" selected="true">Default scoring</option> + <option value="SHEL">SHEL</option> + <option value="LENGTH">LENGTH</option> + <option value="LOGLENGTH">LOGLENGTH</option> + <option value="NORMA">NORMA</option> + <option value="LMAT">LMAT</option> + <option value="CLARK_C">CLARK_C</option> + <option value="CLARK_G">CLARK_G</option> + <option value="KRAKEN">KRAKEN</option> + <option value="GENERIC">GENERIC</option> + </param> + <conditional name="minscore_select"> + <param name="minscore" type="select" label="minimum score/confidence of the classification of a read to pass the quality filter; all pass by default" help="(--minscore)"> + <option value="default" selected="true">Default all pass</option> + <option value="specify_minscore">Specify value</option> + </param> + <when value="default"> + </when> + <when value="specify_minscore"> + <param name="minscore_value" type="integer" min="0" value="0" label="minimum score/confidence value"/> + </when> + </conditional> + <conditional name="mintaxa_type"> + <param name="mintaxa_select" type="select" label="Minimum taxa to avoid collapsing one level into the parent (if not specified a value will be automatically assigned)" help="(--mintaxa)"> + <option value="default" selected="true">Automatically assigned</option> + <option value="specify_mintaxa">Choose value</option> + </param> + <when value="default"> + </when> + <when value="specify_mintaxa"> + <param name="mintaxa" type="integer" min="0" value="0" label="Minimum taxa number"/> + </when> + </conditional> + <conditional name="exclude_taxa_type"> + <param name="exclude_taxa_select" type="select" label="NCBI taxid code to exclude a taxon and all underneath (default, no exclude)" help="(--exclude)"> + <option value="no_exclude">No exclusion</option> + <option value="yes_exclude">Specify excluded taxa</option> + </param> + <when value="yes_exclude"> + <param name="exclude_taxa_name" type="text" label="NCBI taxid code to exclude" /> + </when> + <when value="no_exclude"/> + </conditional> + <conditional name="include_taxa_type"> + <param name="include_taxa_select" type="select" label="NCBI taxid code to include a taxon and all underneath" help="(--include)"> + <option value="no_include">Default no taxa include</option> + <option value="yes_include">Specify included taxa</option> + </param> + <when value="yes_include"> + <param name="include_taxa_name" type="text" label="NCBI taxid code to include"/> + </when> + <when value="no_include"/> + </conditional> + <param argument="--avoidcross" type="boolean" truevalue="--avoidcross" falsevalue="" label="Avoid cross analysis" help="(--avoidcross)"> </param> + </section> + <!-- Detailed more fine parameters --> + <section name="more_advanced_option" title=" Fine tuning of algorithm parameters"> + <conditional name="minscore_type"> + <param name="minscore_select" type="select" label="minimum score/confidence of the classification of a read in control samples to pass the quality filter; it defaults to minscore" help="(--ctrlminscore)"> + <option value="default_minscore">Default minscore</option> + <option value="specify_minscore">Specify minscore</option> + </param> + <when value="default_minscore"/> + <when value="specify_minscore"> + <param name="ctrlminscore" type="integer" value="0" label="minimum score/confidence"/> + </when> + </conditional> + <conditional name="ctrlmintaxa_type"> + <param name="ctrlmintaxa_select" type="select" label="Minimum taxa to avoid collapsing one level into the parent" help="(--ctrlmintaxa)"> + <option value="default_ctrlmintaxa">Default value</option> + <option value="specify_ctrlmintaxa">Specify minimum taxa number</option> + </param> + <when value="default_ctrlmintaxa"/> + <when value="specify_ctrlmintaxa"> + <param name="ctrlmintaxa" type="integer" value="0" label="Minimum taxa number"/> + </when> + </conditional> + <param name="summary" type="select" label="select to 'add' summary samples to other samples, or to 'only' show summary samples or to 'avoid' summaries at all" help="(--summary)"> + <option value="ADD" selected="true">ADD</option> + <option value="ONLY">ONLY</option> + <option value="AVOID">AVOID</option> + </param> + <param argument="--takeoutroot" type="boolean" truevalue="--takeoutroot" falsevalue="" label="remove counts directly assigned to the root level" help="(--takeoutroot)"/> + <param argument="--nokollapse" type="boolean" truevalue="--nokollapse" falsevalue="" label="show the cellular organisms taxon" help="(--nokollapse)"/> + <param argument="--strain" type="boolean" truevalue="--strain" falsevalue="" label="Strain level instead of species as the resolution limit for the robust contamination removal algorithm; use with caution, this is an experimental feature" help="(--strain)" /> + <param argument="--sequential" type="boolean" truevalue="--sequential" falsevalue="" label="deactivate parallel processing" help="(--sequential)" /> + <param argument="--debug" type="boolean" truevalue="--debug" falsevalue="" label="increase output verbosity and perform additional checks" help="(--debug)" /> + <param argument="--version" type="boolean" truevalue="--version" falsevalue="" label=" show program's version number and exit" help="(--version)" /> + </section> + </inputs> + <!-- OUTPUT FILE, TYPE DEPENDING ON extra PARAMETER --> + <outputs> + <data name="html_report" format="html" from_work_dir="output.rcf.html" label="${tool.name} on ${on_string}: html report"> + <filter> output_option['nohtml'] == False</filter> + </data> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"/> + + <data name="data_csv" format="csv" from_work_dir="output.rcf.data.csv" label="${tool.name} on ${on_string}: data.csv"> + <filter> output_option['extra'] == 'CSV' </filter> + </data> + <data name="stat_csv" format="csv" from_work_dir="output.rcf.stat.csv" label="${tool.name} on ${on_string}: stat csv"> + <filter> output_option['extra'] == 'CSV' or output_option['extra'] == 'MULTICSV' </filter> + </data> + <data name="data_tsv" format="tabular" from_work_dir="output.rcf.data.tsv" label="${tool.name} on ${on_string}: data tsv"> + <filter> output_option['extra'] == 'TSV' </filter> + </data> + <data name="stat_tsv" format="tabular" from_work_dir="output.rcf.stat.tsv" label="${tool.name} on ${on_string}: stat tsv"> + <filter> output_option['extra'] == 'TSV' </filter> + </data> + <data name="xls_report" format="xlsx" from_work_dir="output.rcf.xlsx" label="${tool.name} on ${on_string}: xlsx report"> + <filter> output_option['extra'] == 'FULL' or output_option['extra'] == 'DYNOMICS'</filter> + </data> + <data name="stat_bz" format="bz2" from_work_dir="output.rcf.stat.pkl.bz2" label="${tool.name} on ${on_string}: stat.pkl.bz2"> + <filter> output_option['pickle'] == True </filter> + </data> + <data name="data_bz" format="bz2" from_work_dir="output.rcf.data.pkl.bz2" label="${tool.name} on ${on_string}: data.pkl.bz2"> + <filter> output_option['pickle'] == True </filter> + </data> + </outputs> + <tests> + <test> <!-- kraken input and CSV output TEST_1--> + <section name="database"> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-2022"/> + </conditional> + </section> + <section name="input_option"> + <param name="input_file" value="kraken_test/kraken.out"/> + <conditional name="file_type"> + <param name="filetype" value="kraken"/> + </conditional> + </section> + <section name="output_option"> + <param name="output_type" value="default_type"/> + </section> + <section name="more_advanced_option"> + <param name="summary" value="AVOID"/> + </section> + <output name="data_csv" file="kraken_test/test1_csv.rcf.data.csv" lines_diff="2"/> + <output name="stat_csv" file="kraken_test/test1_csv.rcf.stat.csv" lines_diff="2"/> + <output name="html_report" file="kraken_test/test1_csv.rcf.html" lines_diff="2"/> + <output name="logfile" file="kraken_test/test1_csv.log" lines_diff="7"/> + </test> + <test> <!-- centrifuge input and full options with imported database TEST_2 --> + <section name="database"> + <conditional name="db"> + <param name="db_selector" value="history"/> + <param name="history_db" value="test-db/delnodes.dmp,test-db/division.dmp,test-db/gc.prt,test-db/gencode.dmp,test-db/merged.dmp,test-db/names.dmp,test-db/nodes.dmp,test-db/readme.txt"/> + </conditional> + </section> + <section name="input_option"> + <param name="input_file" value="centrifuge_test/centrifuge.out"/> + <conditional name="file_type"> + <param name="filetype" value="centrifuge"/> + </conditional> + </section> + <section name="output_option"> + <param name="extra" value="MULTICSV"/> + <param name="pickle" value="false"/> + <param name="nohtml" value="true"/> + </section> + <section name="advanced_option"> + <conditional name="control_select"> + <param name="controls_type" value="add_neg"/> + <param name="controls" value="0"/> + </conditional> + <param name="scoring" value="NORMA"/> + <conditional name="minscore_select"> + <param name="minscore" value="specify_minscore"/> + <param name="minscore_value" value="0"/> + </conditional> + <param name="avoidcross" value="true"/> + </section> + <section name="more_advanced_option"> + <conditional name="minscore_type"> + <param name="minscore_select" value="specify_minscore"/> + <param name="ctrlminscore" value="0"/> + </conditional> + <param name="summary" value="AVOID"/> + </section> + <output name="stat_csv" file="centrifuge_test/test2_multicsv.rcf.stat.csv" lines_diff="2"/> + <output name="logfile" file="centrifuge_test/test2_multicsv.log" lines_diff="20"/> + </test> + <test> <!-- kraken input cached DB several option added and .bz2 files generated TEST_3 --> + <section name="database"> + <conditional name="db_type"> + <param name="db_select" value="cached"/> + <param name="cached_db" value="test-db-2022"/> + </conditional> + </section> + <section name="input_option"> + <param name="input_file" value="kraken_test/kraken.out"/> + <conditional name="file_type"> + <param name="filetype" value="kraken"/> + </conditional> + </section> + <section name="output_option" > + <param name="extra" value="TSV"/> + <param name="pickle" value="false"/> + <param name="nohtml" value="true"/> + </section> + <section name="advanced_option"> + <param name="scoring" value="LOGLENGTH"/> + </section> + <section name="more_advanced_option"> + <param name="summary" value="ONLY"/> + <param name="strain" value="true"/> + </section> + <output name="data_tsv" file="kraken_test/test3_rcf.data.tsv" lines_diff="2"/> + <output name="stat_tsv" file="kraken_test/test3_rcf.stat.tsv" lines_diff="2"/> + <output name="logfile" file="kraken_test/test3_tsv.log" lines_diff="20"/> + </test> + </tests> + <help><![CDATA[ + =-= /home/pierre/anaconda3/envs/rcf/bin/rcf =-= v1.8.1 - Mar 2022 =-= by Jose Manuel Martí =-= + + usage: rcf [-h] [-V] [-n PATH] [--format GENERIC_FORMAT] + (-f FILE | -g FILE | -l FILE | -r FILE | -k FILE) [-o FILE] + [-e OUTPUT_TYPE] [-p] [--nohtml] [-a | -c CONTROLS_NUMBER] + [-s SCORING] [-y NUMBER] [-m INT] [-x TAXID] [-i TAXID] [-z NUMBER] + [-w INT] [-u SUMMARY_BEHAVIOR] [-t] [--nokollapse] [-d] [--strain] + [--sequential] + + Robust comparative analysis and contamination removal for metagenomics + + options: + -h, --help show this help message and exit + -V, --version show program's version number and exit + + input: + Define Recentrifuge input files and formats + + -n PATH, --nodespath PATH + path for the nodes information files (nodes.dmp and + names.dmp from NCBI) + --format GENERIC_FORMAT + format of the output files from a generic classifier + included with the option -g; It is a string like + "TYP:csv,TID:1,LEN:3,SCO:6,UNC:0" where valid file + TYPes are csv/tsv/ssv, and the rest of fields indicate + the number of column used (starting in 1) for the + TaxIDs assigned, the LENgth of the read, the SCOre + given to the assignment, and the taxid code used for + UNClassified reads + -f FILE, --file FILE Centrifuge output files; if a single directory is + entered, every .out file inside will be taken as a + different sample; multiple -f is available to include + several Centrifuge samples + -g FILE, --generic FILE + output file from a generic classifier; it requires the + flag --format (see such option for details); multiple + -g is available to include several generic samples + -l FILE, --lmat FILE LMAT output dir or file prefix; if just "." is + entered, every subdirectory under the current + directory will be taken as a sample and scanned + looking for LMAT output files; multiple -l is + available to include several samples + -r FILE, --clark FILE + CLARK full-mode output files; if a single directory is + entered, every .csv file inside will be taken as a + different sample; multiple -r is available to include + several CLARK, CLARK-l, and CLARK-S full-mode samples + -k FILE, --kraken FILE + Kraken output files; if a single directory is entered, + every .krk file inside will be taken as a different + sample; multiple -k is available to include several + Kraken (version 1 or 2) samples + + output: + Related to the Recentrifuge output files + + -o FILE, --outprefix FILE + output prefix; if not given, it will be inferred from + input files; an HTML filename is still accepted for + backwards compatibility with legacy --outhtml option + -e OUTPUT_TYPE, --extra OUTPUT_TYPE + type of extra output to be generated, and can be one + of ['FULL', 'CSV', 'MULTICSV', 'TSV', 'DYNOMICS'] + -p, --pickle pickle (serialize) statistics and data results in + pandas DataFrames (format affected by selection of + --extra) + --nohtml suppress saving the HTML output file + + tuning: + Coarse tuning of algorithm parameters + + -a, --avoidcross avoid cross analysis + -c CONTROLS_NUMBER, --controls CONTROLS_NUMBER + this number of first samples will be treated as + negative controls; default is no controls + -s SCORING, --scoring SCORING + type of scoring to be applied, and can be one of + ['SHEL', 'LENGTH', 'LOGLENGTH', 'NORMA', 'LMAT', + 'CLARK_C', 'CLARK_G', 'KRAKEN', 'GENERIC'] + -y NUMBER, --minscore NUMBER + minimum score/confidence of the classification of a + read to pass the quality filter; all pass by default + -m INT, --mintaxa INT + minimum taxa to avoid collapsing one level into the + parent (if not specified a value will be automatically + assigned) + -x TAXID, --exclude TAXID + NCBI taxid code to exclude a taxon and all underneath + (multiple -x is available to exclude several taxid) + -i TAXID, --include TAXID + NCBI taxid code to include a taxon and all underneath + (multiple -i is available to include several taxid); + by default, all the taxa are considered for inclusion + + fine tuning: + Fine tuning of algorithm parameters + + -z NUMBER, --ctrlminscore NUMBER + minimum score/confidence of the classification of a + read in control samples to pass the quality filter; it + defaults to "minscore" + -w INT, --ctrlmintaxa INT + minimum taxa to avoid collapsing one level into the + parent (if not specified a value will be automatically + assigned) + -u SUMMARY_BEHAVIOR, --summary SUMMARY_BEHAVIOR + choice for summary behaviour, and can be one of + ['ADD', 'ONLY', 'AVOID'] + -t, --takeoutroot remove counts directly assigned to the "root" level + --nokollapse show the "cellular organisms" taxon + + advanced: + Advanced modes of running + + -d, --debug increase output verbosity and perform additional + checks + --strain set strain level instead of species as the resolution + limit for the robust contamination removal algorithm; + use with caution, this is an experimental feature + --sequential deactivate parallel processing + + rcf - Release 1.8.1 - Mar 2022 + + Copyright (C) 2017–2022, Jose Manuel Martí Martínez + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + ]]></help> + <expand macro="citations"/> + </tool>