Mercurial > repos > iuc > metaphlan
comparison metaphlan.xml @ 0:ec88e50db7c1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
| author | iuc |
|---|---|
| date | Mon, 19 Apr 2021 20:52:24 +0000 |
| parents | |
| children | b0aed9401632 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ec88e50db7c1 |
|---|---|
| 1 <tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>to profile the composition of microbial communities</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="edam_ontology"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <version_command>metaphlan -v</version_command> | |
| 9 <command detect_errors="aggressive"><![CDATA[ | |
| 10 #if $inputs.in.selector == "raw" | |
| 11 #if $inputs.in.raw_in.selector == "single" | |
| 12 #set full_ext=$inputs.in.raw_in.in.datatype.file_ext | |
| 13 #if $full_ext.endswith("gz") | |
| 14 #set $file_path="in" | |
| 15 zcat '$inputs.in.raw_in.in' > '$file_path' | |
| 16 && | |
| 17 #else if $full_ext.endswith("bz2") | |
| 18 #set $file_path="in" | |
| 19 bzcat '$inputs.in.raw_in.in' > '$file_path' | |
| 20 && | |
| 21 #else | |
| 22 #set $file_path=$inputs.in.raw_in.in | |
| 23 #end if | |
| 24 #else if $inputs.in.raw_in.selector == "multiple" | |
| 25 #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext | |
| 26 #set file_path="" | |
| 27 #set sep="" | |
| 28 #for $i, $f in enumerate($inputs.in.raw_in.in) | |
| 29 #if $f.datatype.file_ext != $full_ext | |
| 30 echo "Different datatypes for input files" | |
| 31 && | |
| 32 exit 1 | |
| 33 #end if | |
| 34 #if $full_ext.endswith("gz") | |
| 35 #set fp="input_%s" % ($i) | |
| 36 zcat '$f' > '$fp' | |
| 37 && | |
| 38 #else if $full_ext.endswith("bz2") | |
| 39 #set fp="input_%s" % ($i) | |
| 40 bzcat '$f' > '$fp' | |
| 41 && | |
| 42 #else | |
| 43 #set fp=$f | |
| 44 #end if | |
| 45 #set $file_path+="%s%s" % ($sep, $fp) | |
| 46 #set $sep="," | |
| 47 #end for | |
| 48 #else if $inputs.in.raw_in.selector == "paired" | |
| 49 #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext | |
| 50 #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext | |
| 51 echo "Different datatypes for input paired-end files" | |
| 52 && | |
| 53 exit 1 | |
| 54 #end if | |
| 55 #if $full_ext.endswith("gz") | |
| 56 zcat '$inputs.in.raw_in.in_f' > 'in_f' | |
| 57 && | |
| 58 zcat '$inputs.in.raw_in.in_r' > 'in_r' | |
| 59 && | |
| 60 #set file_path="in_f,in_r" | |
| 61 #else if $full_ext.endswith("bz2") | |
| 62 bzcat '$inputs.in.raw_in.in_f' > 'in_f' | |
| 63 && | |
| 64 bzcat '$inputs.in.raw_in.in_r' > 'in_r' | |
| 65 && | |
| 66 #set file_path="in_f,in_r" | |
| 67 #else | |
| 68 #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r) | |
| 69 #end if | |
| 70 #end if | |
| 71 | |
| 72 #if $full_ext.startswith("fastq") | |
| 73 #set ext='fastq' | |
| 74 #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2")) | |
| 75 #set ext='fasta' | |
| 76 #else | |
| 77 #set ext=$full_ext | |
| 78 #end if | |
| 79 #end if | |
| 80 | |
| 81 #if $inputs.db.db_selector == "history" | |
| 82 mkdir 'ref_db' | |
| 83 && | |
| 84 bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db' | |
| 85 && | |
| 86 python '$__tool_directory__/customizemetadata.py' | |
| 87 transform_json_to_pkl | |
| 88 --json '$inputs.db.mpa_pkl' | |
| 89 --pkl 'ref_db/custom_db.pkl' | |
| 90 && | |
| 91 #end if | |
| 92 | |
| 93 metaphlan | |
| 94 #if $inputs.in.selector == "raw" | |
| 95 '$file_path' | |
| 96 --input_type '$ext' | |
| 97 --read_min_len $inputs.in.read_min_len | |
| 98 --bt2_ps '$inputs.in.mapping.bt2_ps' | |
| 99 --min_mapq_val $inputs.in.mapping.min_mapq_val | |
| 100 #else | |
| 101 '$inputs.in.in' | |
| 102 --input_type '$inputs.in.selector' | |
| 103 #end if | |
| 104 #if $inputs.db.db_selector == "cached" | |
| 105 --bowtie2db '$inputs.db.cached_db.fields.path' | |
| 106 --index '$inputs.db.cached_db.fields.dbkey' | |
| 107 #else | |
| 108 --bowtie2db 'ref_db/' | |
| 109 --index 'custom_db' | |
| 110 #end if | |
| 111 -t '$analysis.analysis_type.t' | |
| 112 #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats" | |
| 113 --tax_lev '$analysis.analysis_type.tax_lev' | |
| 114 #else if $analysis.analysis_type.t == "clade_specific_strain_tracker" | |
| 115 --clade '$analysis.analysis_type.clade' | |
| 116 #if str($analysis.analysis_type.min_ab) != '' | |
| 117 --min_ab $analysis.analysis_type.min_ab | |
| 118 #end if | |
| 119 #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != '' | |
| 120 --nreads $$analysis.analysis_type.nreads | |
| 121 #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != '' | |
| 122 --pres_th $analysis.analysis_type.pres_th | |
| 123 #end if | |
| 124 --min_cu_len $analysis.min_cu_len | |
| 125 #if str($analysis.min_alignment_len) != '' | |
| 126 --min_alignment_len $analysis.min_alignment_len | |
| 127 #end if | |
| 128 #if 'add_viruses' in $analysis.organism_profiling | |
| 129 --add_viruses | |
| 130 #end if | |
| 131 #if 'ignore_eukaryotes' in $analysis.organism_profiling | |
| 132 --ignore_eukaryotes | |
| 133 #end if | |
| 134 #if 'ignore_bacteria' in $analysis.organism_profiling | |
| 135 --ignore_bacteria | |
| 136 #end if | |
| 137 #if 'ignore_archaea' in $analysis.organism_profiling | |
| 138 --ignore_archaea | |
| 139 #end if | |
| 140 --stat_q $analysis.stat_q | |
| 141 --perc_nonzero $analysis.perc_nonzero | |
| 142 #if $analysis.ignore_markers | |
| 143 --ignore_markers '$analysis.ignore_markers' | |
| 144 #end if | |
| 145 $analysis.avoid_disqm | |
| 146 --sample_id_key '$out.sample_id_key' | |
| 147 --sample_id '$out.sample_id' | |
| 148 $out.use_group_representative | |
| 149 $out.legacy_output | |
| 150 $out.CAMI_format_output | |
| 151 $out.unknown_estimation | |
| 152 -o '$output_file' | |
| 153 --bowtie2out 'bowtie2out' | |
| 154 -s '$sam_output_file' | |
| 155 --biom '$biom_output_file' | |
| 156 --nproc \${GALAXY_SLOTS:-4} | |
| 157 | |
| 158 #if $inputs.in.selector == "raw" | |
| 159 && | |
| 160 mv 'bowtie2out' '$bowtie2out' | |
| 161 #end if | |
| 162 ]]></command> | |
| 163 <inputs> | |
| 164 <section name="inputs" title="Inputs" expanded="true"> | |
| 165 <conditional name="in"> | |
| 166 <param name="selector" type="select" label="Input(s)"> | |
| 167 <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option> | |
| 168 <option value="sam">Externally BowTie2-mapped SAM file</option> | |
| 169 <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option> | |
| 170 </param> | |
| 171 <when value="raw"> | |
| 172 <conditional name="raw_in"> | |
| 173 <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads"> | |
| 174 <option value="single" selected="true">One single-end file</option> | |
| 175 <option value="multiple">Multiple single-end files</option> | |
| 176 <option value="paired">Paired-end files</option> | |
| 177 </param> | |
| 178 <when value="single"> | |
| 179 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/> | |
| 180 </when> | |
| 181 <when value="multiple"> | |
| 182 <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/> | |
| 183 </when> | |
| 184 <when value="paired"> | |
| 185 <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/> | |
| 186 <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/> | |
| 187 </when> | |
| 188 </conditional> | |
| 189 <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/> | |
| 190 <section name="mapping" title="Mapping" expanded="true"> | |
| 191 <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files"> | |
| 192 <option value="sensitive">Sensitive</option> | |
| 193 <option value="very-sensitive" selected="true">Very sensitive</option> | |
| 194 <option value="sensite-local">Sensitive local</option> | |
| 195 <option value="very-sensite-local">Very sensitive local</option> | |
| 196 </param> | |
| 197 <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/> | |
| 198 </section> | |
| 199 </when> | |
| 200 <when value="sam"> | |
| 201 <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/> | |
| 202 </when> | |
| 203 <when value="bowtie2out"> | |
| 204 <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run" | |
| 205 help="File needs to be generated with MetaPhlAn versions >3.0"/> | |
| 206 </when> | |
| 207 </conditional> | |
| 208 <conditional name="db"> | |
| 209 <param name="db_selector" type="select" label="Database with clade-specific marker genes"> | |
| 210 <option value="cached" selected="true">Locally cached</option> | |
| 211 <option value="history">From history</option> | |
| 212 </param> | |
| 213 <when value="cached"> | |
| 214 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select"> | |
| 215 <options from_data_table="metaphlan_database"> | |
| 216 <validator message="No MetaPhlAn database is available" type="no_options" /> | |
| 217 </options> | |
| 218 </param> | |
| 219 </when> | |
| 220 <when value="history"> | |
| 221 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> | |
| 222 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/> | |
| 223 </when> | |
| 224 </conditional> | |
| 225 </section> | |
| 226 <section name="analysis" title="Analysis" expanded="true"> | |
| 227 <conditional name="analysis_type"> | |
| 228 <param argument="-t" type="select" label="Type of analysis to perform"> | |
| 229 <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option> | |
| 230 <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option> | |
| 231 <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> | |
| 232 <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> | |
| 233 <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> | |
| 234 <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option> | |
| 235 <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> | |
| 236 <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> | |
| 237 </param> | |
| 238 <when value="rel_ab"> | |
| 239 <expand macro="tax_lev"/> | |
| 240 </when> | |
| 241 <when value="rel_ab_w_read_stats"> | |
| 242 <expand macro="tax_lev"/> | |
| 243 </when> | |
| 244 <when value="reads_map"/> | |
| 245 <when value="clade_profiles"/> | |
| 246 <when value="clade_specific_strain_tracker"> | |
| 247 <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" | |
| 248 help="Markers are also extracted for subclades" /> | |
| 249 <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> | |
| 250 </when> | |
| 251 <when value="marker_ab_table"> | |
| 252 <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome" | |
| 253 help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/> | |
| 254 </when> | |
| 255 <when value="marker_counts"/> | |
| 256 <when value="marker_pres_table"> | |
| 257 <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> | |
| 258 </when> | |
| 259 </conditional> | |
| 260 <param argument="--min_cu_len" type="integer" value="2000" | |
| 261 label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> | |
| 262 <param argument="--min_alignment_len" type="integer" optional="true" | |
| 263 label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> | |
| 264 <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile"> | |
| 265 <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> | |
| 266 <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> | |
| 267 <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> | |
| 268 <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option> | |
| 269 </param> | |
| 270 <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances"> | |
| 271 <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option> | |
| 272 <option value="avg_l">avg_l: Average of length-normalized marker counts</option> | |
| 273 <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option> | |
| 274 <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option> | |
| 275 <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option> | |
| 276 <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option> | |
| 277 <option value="med">med: Median of length-normalized marker counts</option> | |
| 278 </param> | |
| 279 <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/> | |
| 280 <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/> | |
| 281 <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/> | |
| 282 <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue='' | |
| 283 label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" | |
| 284 help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> | |
| 285 </section> | |
| 286 <section name="out" title="Outputs" expanded="true"> | |
| 287 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> | |
| 288 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> | |
| 289 <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' | |
| 290 label="Use a species as representative for species groups?"/> | |
| 291 <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' | |
| 292 label="Old MetaPhlAn2 two columns output?"/> | |
| 293 <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' | |
| 294 label="Report the profiling using the CAMI output format?"/> | |
| 295 <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue='' | |
| 296 label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> | |
| 297 </section> | |
| 298 </inputs> | |
| 299 <outputs> | |
| 300 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" /> | |
| 301 <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output"> | |
| 302 <filter>inputs['in']['selector'] == "raw"</filter> | |
| 303 </data> | |
| 304 <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file"> | |
| 305 <filter>inputs['in']['selector'] == "raw"</filter> | |
| 306 </data> | |
| 307 <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" /> | |
| 308 </outputs> | |
| 309 <tests> | |
| 310 <test expect_num_outputs="4"> | |
| 311 <section name="inputs"> | |
| 312 <conditional name="in"> | |
| 313 <param name="selector" value="raw"/> | |
| 314 <conditional name="raw_in"> | |
| 315 <!-- Single GZ file --> | |
| 316 <param name="selector" value="single"/> | |
| 317 <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> | |
| 318 </conditional> | |
| 319 <param name="read_min_len" value="70"/> | |
| 320 <section name="mapping"> | |
| 321 <param name="bt2_ps" value="sensitive"/> | |
| 322 <param name="min_mapq_val" value="5"/> | |
| 323 </section> | |
| 324 </conditional> | |
| 325 <conditional name="db"> | |
| 326 <!-- Cached db --> | |
| 327 <param name="db_selector" value="cached"/> | |
| 328 <param name="cached_db" value="test-db-20210409"/> | |
| 329 </conditional> | |
| 330 </section> | |
| 331 <section name="analysis"> | |
| 332 <param name="min_cu_len" value="2000"/> | |
| 333 <param name="organism_profiling" value="add_viruses"/> | |
| 334 <param name="stat" value="avg_g"/> | |
| 335 <param name="stat_q" value="0.2"/> | |
| 336 <param name="perc_nonzero" value="0.33"/> | |
| 337 <param name="avoid_disqm" value="true"/> | |
| 338 </section> | |
| 339 <section name="out"> | |
| 340 <param name="sample_id_key" value="SampleID"/> | |
| 341 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 342 <param name="use_group_representative" value="false"/> | |
| 343 <param name="legacy_output" value="false"/> | |
| 344 <param name="CAMI_format_output" value="false"/> | |
| 345 <param name="unknown_estimation" value="false"/> | |
| 346 </section> | |
| 347 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 348 <assert_contents> | |
| 349 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 350 </assert_contents> | |
| 351 </output> | |
| 352 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> | |
| 353 <assert_contents> | |
| 354 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
| 355 <has_text text="37637__U2I1U8__N579_01580"/> | |
| 356 </assert_contents> | |
| 357 </output> | |
| 358 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> | |
| 359 <assert_contents> | |
| 360 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
| 361 </assert_contents> | |
| 362 </output> | |
| 363 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 364 <assert_contents> | |
| 365 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 366 </assert_contents> | |
| 367 </output> | |
| 368 </test> | |
| 369 <test expect_num_outputs="4"> | |
| 370 <section name="inputs"> | |
| 371 <conditional name="in"> | |
| 372 <param name="selector" value="raw"/> | |
| 373 <conditional name="raw_in"> | |
| 374 <!-- Multiple GZ file --> | |
| 375 <param name="selector" value="multiple"/> | |
| 376 <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/> | |
| 377 </conditional> | |
| 378 <param name="read_min_len" value="70"/> | |
| 379 <section name="mapping"> | |
| 380 <param name="bt2_ps" value="sensitive"/> | |
| 381 <param name="min_mapq_val" value="5"/> | |
| 382 </section> | |
| 383 </conditional> | |
| 384 <conditional name="db"> | |
| 385 <!-- Local db --> | |
| 386 <param name="db_selector" value="history"/> | |
| 387 <param name="bowtie2db" value="test-db.fasta"/> | |
| 388 <param name="mpa_pkl" value="test-db.json"/> | |
| 389 </conditional> | |
| 390 </section> | |
| 391 <section name="analysis"> | |
| 392 <param name="min_cu_len" value="2000"/> | |
| 393 <param name="organism_profiling" value="add_viruses"/> | |
| 394 <param name="stat" value="avg_g"/> | |
| 395 <param name="stat_q" value="0.2"/> | |
| 396 <param name="perc_nonzero" value="0.33"/> | |
| 397 <param name="avoid_disqm" value="true"/> | |
| 398 </section> | |
| 399 <section name="out"> | |
| 400 <param name="sample_id_key" value="SampleID"/> | |
| 401 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 402 <param name="use_group_representative" value="false"/> | |
| 403 <param name="legacy_output" value="false"/> | |
| 404 <param name="CAMI_format_output" value="false"/> | |
| 405 <param name="unknown_estimation" value="false"/> | |
| 406 </section> | |
| 407 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 408 <assert_contents> | |
| 409 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 410 <has_text text="relative_abundance"/> | |
| 411 <has_text text="NCBI_tax_id"/> | |
| 412 <has_text text="clade_name"/> | |
| 413 </assert_contents> | |
| 414 </output> | |
| 415 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> | |
| 416 <assert_contents> | |
| 417 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
| 418 <has_text text="37637__U2I1U8__N579_01580"/> | |
| 419 </assert_contents> | |
| 420 </output> | |
| 421 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> | |
| 422 <assert_contents> | |
| 423 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
| 424 </assert_contents> | |
| 425 </output> | |
| 426 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 427 <assert_contents> | |
| 428 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 429 </assert_contents> | |
| 430 </output> | |
| 431 </test> | |
| 432 <test expect_num_outputs="4"> | |
| 433 <section name="inputs"> | |
| 434 <conditional name="in"> | |
| 435 <param name="selector" value="raw"/> | |
| 436 <conditional name="raw_in"> | |
| 437 <!-- Paired GZ file --> | |
| 438 <param name="selector" value="paired"/> | |
| 439 <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/> | |
| 440 <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/> | |
| 441 </conditional> | |
| 442 <param name="read_min_len" value="70"/> | |
| 443 <section name="mapping"> | |
| 444 <param name="bt2_ps" value="sensitive"/> | |
| 445 <param name="min_mapq_val" value="5"/> | |
| 446 </section> | |
| 447 </conditional> | |
| 448 <conditional name="db"> | |
| 449 <!-- Cached db --> | |
| 450 <param name="db_selector" value="cached"/> | |
| 451 <param name="cached_db" value="test-db-20210409"/> | |
| 452 </conditional> | |
| 453 </section> | |
| 454 <section name="analysis"> | |
| 455 <param name="min_cu_len" value="2000"/> | |
| 456 <param name="organism_profiling" value="add_viruses"/> | |
| 457 <param name="stat" value="avg_g"/> | |
| 458 <param name="stat_q" value="0.2"/> | |
| 459 <param name="perc_nonzero" value="0.33"/> | |
| 460 <param name="avoid_disqm" value="true"/> | |
| 461 </section> | |
| 462 <section name="out"> | |
| 463 <param name="sample_id_key" value="SampleID"/> | |
| 464 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 465 <param name="use_group_representative" value="false"/> | |
| 466 <param name="legacy_output" value="false"/> | |
| 467 <param name="CAMI_format_output" value="false"/> | |
| 468 <param name="unknown_estimation" value="false"/> | |
| 469 </section> | |
| 470 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 471 <assert_contents> | |
| 472 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 473 <has_text text="relative_abundance"/> | |
| 474 <has_text text="NCBI_tax_id"/> | |
| 475 <has_text text="clade_name"/> | |
| 476 </assert_contents> | |
| 477 </output> | |
| 478 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> | |
| 479 <assert_contents> | |
| 480 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
| 481 <has_text text="37637__U2I1U8__N579_01580"/> | |
| 482 </assert_contents> | |
| 483 </output> | |
| 484 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> | |
| 485 <assert_contents> | |
| 486 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
| 487 </assert_contents> | |
| 488 </output> | |
| 489 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 490 <assert_contents> | |
| 491 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 492 </assert_contents> | |
| 493 </output> | |
| 494 </test> | |
| 495 <test expect_num_outputs="2"> | |
| 496 <section name="inputs"> | |
| 497 <conditional name="in"> | |
| 498 <!-- SAM --> | |
| 499 <param name="selector" value="sam"/> | |
| 500 <param name="in" value="SRS014464-Anterior_nares.sam"/> | |
| 501 </conditional> | |
| 502 <conditional name="db"> | |
| 503 <!-- Cached db --> | |
| 504 <param name="db_selector" value="cached"/> | |
| 505 <param name="cached_db" value="test-db-20210409"/> | |
| 506 </conditional> | |
| 507 </section> | |
| 508 <section name="analysis"> | |
| 509 <param name="min_cu_len" value="2000"/> | |
| 510 <param name="organism_profiling" value="add_viruses"/> | |
| 511 <param name="stat" value="avg_g"/> | |
| 512 <param name="stat_q" value="0.2"/> | |
| 513 <param name="perc_nonzero" value="0.33"/> | |
| 514 <param name="avoid_disqm" value="true"/> | |
| 515 </section> | |
| 516 <section name="out"> | |
| 517 <param name="sample_id_key" value="SampleID"/> | |
| 518 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 519 <param name="use_group_representative" value="false"/> | |
| 520 <param name="legacy_output" value="false"/> | |
| 521 <param name="CAMI_format_output" value="false"/> | |
| 522 <param name="unknown_estimation" value="false"/> | |
| 523 </section> | |
| 524 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 525 <assert_contents> | |
| 526 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 527 <has_text text="relative_abundance"/> | |
| 528 <has_text text="NCBI_tax_id"/> | |
| 529 <has_text text="clade_name"/> | |
| 530 </assert_contents> | |
| 531 </output> | |
| 532 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 533 <assert_contents> | |
| 534 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 535 </assert_contents> | |
| 536 </output> | |
| 537 </test> | |
| 538 <test expect_num_outputs="2"> | |
| 539 <section name="inputs"> | |
| 540 <conditional name="in"> | |
| 541 <!-- bowtie2out --> | |
| 542 <param name="selector" value="bowtie2out"/> | |
| 543 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> | |
| 544 </conditional> | |
| 545 <conditional name="db"> | |
| 546 <!-- Cached db --> | |
| 547 <param name="db_selector" value="cached"/> | |
| 548 <param name="cached_db" value="test-db-20210409"/> | |
| 549 </conditional> | |
| 550 </section> | |
| 551 <conditional name="in"> | |
| 552 <param name="selector" value="bowtie2out"/> | |
| 553 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> | |
| 554 </conditional> | |
| 555 <section name="mapping"> | |
| 556 <param name="bt2_ps" value="sensite"/> | |
| 557 <param name="min_mapq_val" value="5"/> | |
| 558 </section> | |
| 559 <section name="analysis"> | |
| 560 <param name="min_cu_len" value="2000"/> | |
| 561 <param name="organism_profiling" value="add_viruses"/> | |
| 562 <param name="stat" value="avg_g"/> | |
| 563 <param name="stat_q" value="0.2"/> | |
| 564 <param name="perc_nonzero" value="0.33"/> | |
| 565 <param name="avoid_disqm" value="true"/> | |
| 566 </section> | |
| 567 <section name="out"> | |
| 568 <param name="sample_id_key" value="SampleID"/> | |
| 569 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 570 <param name="use_group_representative" value="false"/> | |
| 571 <param name="legacy_output" value="false"/> | |
| 572 <param name="CAMI_format_output" value="false"/> | |
| 573 <param name="unknown_estimation" value="false"/> | |
| 574 </section> | |
| 575 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 576 <assert_contents> | |
| 577 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 578 <has_text text="relative_abundance"/> | |
| 579 <has_text text="NCBI_tax_id"/> | |
| 580 <has_text text="clade_name"/> | |
| 581 </assert_contents> | |
| 582 </output> | |
| 583 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 584 <assert_contents> | |
| 585 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 586 </assert_contents> | |
| 587 </output> | |
| 588 </test> | |
| 589 <test expect_num_outputs="4"> | |
| 590 <section name="inputs"> | |
| 591 <conditional name="in"> | |
| 592 <param name="selector" value="raw"/> | |
| 593 <conditional name="raw_in"> | |
| 594 <!-- Single FASTA file --> | |
| 595 <param name="selector" value="single"/> | |
| 596 <param name="in" value="SRS014464-Anterior_nares.fasta"/> | |
| 597 </conditional> | |
| 598 <param name="read_min_len" value="70"/> | |
| 599 <section name="mapping"> | |
| 600 <param name="bt2_ps" value="sensitive"/> | |
| 601 <param name="min_mapq_val" value="5"/> | |
| 602 </section> | |
| 603 </conditional> | |
| 604 <conditional name="db"> | |
| 605 <!-- Cached db --> | |
| 606 <param name="db_selector" value="cached"/> | |
| 607 <param name="cached_db" value="test-db-20210409"/> | |
| 608 </conditional> | |
| 609 </section> | |
| 610 <section name="analysis"> | |
| 611 <param name="min_cu_len" value="2000"/> | |
| 612 <param name="organism_profiling" value="add_viruses"/> | |
| 613 <param name="stat" value="avg_g"/> | |
| 614 <param name="stat_q" value="0.2"/> | |
| 615 <param name="perc_nonzero" value="0.33"/> | |
| 616 <param name="ignore_markers" value="marker.txt"/> | |
| 617 <param name="avoid_disqm" value="true"/> | |
| 618 </section> | |
| 619 <section name="out"> | |
| 620 <param name="sample_id_key" value="SampleID"/> | |
| 621 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 622 <param name="use_group_representative" value="false"/> | |
| 623 <param name="legacy_output" value="true"/> | |
| 624 <param name="CAMI_format_output" value="false"/> | |
| 625 <param name="unknown_estimation" value="false"/> | |
| 626 </section> | |
| 627 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> | |
| 628 <assert_contents> | |
| 629 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 630 <has_text text="SampleID"/> | |
| 631 <has_text text="Metaphlan_Analysis"/> | |
| 632 </assert_contents> | |
| 633 </output> | |
| 634 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> | |
| 635 <assert_contents> | |
| 636 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
| 637 <has_text text="37637__U2I1U8__N579_01580"/> | |
| 638 </assert_contents> | |
| 639 </output> | |
| 640 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> | |
| 641 <assert_contents> | |
| 642 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
| 643 </assert_contents> | |
| 644 </output> | |
| 645 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 646 <assert_contents> | |
| 647 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 648 </assert_contents> | |
| 649 </output> | |
| 650 </test> | |
| 651 </tests> | |
| 652 <help><![CDATA[ | |
| 653 What it does | |
| 654 ============ | |
| 655 | |
| 656 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, | |
| 657 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. | |
| 658 | |
| 659 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes | |
| 660 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: | |
| 661 | |
| 662 - unambiguous taxonomic assignments; | |
| 663 - accurate estimation of organismal relative abundance; | |
| 664 - species-level resolution for bacteria, archaea, eukaryotes and viruses; | |
| 665 - strain identification and tracking | |
| 666 - orders of magnitude speedups compared to existing methods. | |
| 667 - metagenomic strain-level population genomics | |
| 668 | |
| 669 MetaPhlAn clade-abundance estimation | |
| 670 ------------------------------------ | |
| 671 | |
| 672 The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and | |
| 673 strains in particular cases) present in the metagenome obtained from a microbiome sample and their | |
| 674 relative abundance. | |
| 675 | |
| 676 Marker level analysis | |
| 677 --------------------- | |
| 678 | |
| 679 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non | |
| 680 aggregated marker information. Such capability comes with several slightly different flavours and | |
| 681 are a way to perform strain tracking and comparison across multiple samples. | |
| 682 | |
| 683 Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the | |
| 684 species present in the community, and then a strain-level profiling can be performed to zoom-in into | |
| 685 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out | |
| 686 intermediate file saved during the execution of the default analysis type. | |
| 687 | |
| 688 Inputs | |
| 689 ====== | |
| 690 | |
| 691 Metaphlan takes as input either: | |
| 692 | |
| 693 - one or several sequence files in Fasta, FastQ (compressed or not) | |
| 694 - a BowTie2 produced SAM file | |
| 695 - an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run | |
| 696 | |
| 697 It also need the reference database, which can be locally installed or customized using the dedicated tools. | |
| 698 | |
| 699 Outputs | |
| 700 ======= | |
| 701 | |
| 702 The main output file is a tab-separated file with the predicted taxon relative abundances. | |
| 703 | |
| 704 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. | |
| 705 | |
| 706 | |
| 707 More help and use cases | |
| 708 ======================= | |
| 709 | |
| 710 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. | |
| 711 | |
| 712 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage | |
| 713 | |
| 714 ]]></help> | |
| 715 <expand macro="citations"/> | |
| 716 </tool> |
