Mercurial > repos > iuc > metaphlan
diff metaphlan.xml @ 8:1416b7c401a3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 671a5fc6d4c02bd3eb830c1886a31ecffd134ceb
| author | iuc |
|---|---|
| date | Sun, 11 Aug 2024 20:34:37 +0000 |
| parents | 11136e6b78f2 |
| children | f0ca613c512a |
line wrap: on
line diff
--- a/metaphlan.xml Thu Apr 20 11:25:18 2023 +0000 +++ b/metaphlan.xml Sun Aug 11 20:34:37 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> +<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@"> <description>to profile the composition of microbial communities</description> <macros> <import>macros.xml</import> @@ -15,8 +15,7 @@ <option value="s">Species only</option> </param> <when value="a"> - <param name="split_levels" type='boolean' checked="false" truevalue='true' falsevalue='false' - label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/> + <param name="split_levels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/> </when> <when value="k"/> <when value="p"/> @@ -38,14 +37,12 @@ #set full_ext=$inputs.in.raw_in.in.datatype.file_ext #if $full_ext.endswith("gz") #set $file_path="in" -zcat '$inputs.in.raw_in.in' > '$file_path' -&& + zcat '$inputs.in.raw_in.in' > '$file_path' && #else if $full_ext.endswith("bz2") #set $file_path="in" -bzcat '$inputs.in.raw_in.in' > '$file_path' -&& + bzcat '$inputs.in.raw_in.in' > '$file_path' && #else - #set $file_path=$inputs.in.raw_in.in + #set $file_path="'%s'" % $inputs.in.raw_in.in #end if #else if $inputs.in.raw_in.selector == "multiple" #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext @@ -53,45 +50,54 @@ #set sep="" #for $i, $f in enumerate($inputs.in.raw_in.in) #if $f.datatype.file_ext != $full_ext -echo "Different datatypes for input files" -&& -exit 1 + echo "Different datatypes for input files" && + exit 1 #end if #if $full_ext.endswith("gz") #set fp="input_%s" % ($i) -zcat '$f' > '$fp' -&& + zcat '$f' > '$fp' && #else if $full_ext.endswith("bz2") #set fp="input_%s" % ($i) -bzcat '$f' > '$fp' -&& + bzcat '$f' > '$fp' && #else #set fp=$f #end if - #set $file_path+="%s%s" % ($sep, $fp) + #set $file_path+="'%s%s'" % ($sep, $fp) #set $sep="," #end for #else if $inputs.in.raw_in.selector == "paired" #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext -echo "Different datatypes for input paired-end files" -&& -exit 1 + echo "Different datatypes for input paired-end files" && + exit 1 #end if #if $full_ext.endswith("gz") -zcat '$inputs.in.raw_in.in_f' > 'in_f' -&& -zcat '$inputs.in.raw_in.in_r' > 'in_r' -&& - #set file_path="in_f,in_r" + zcat '$inputs.in.raw_in.in_f' > 'in_f' && + zcat '$inputs.in.raw_in.in_r' > 'in_r' && + #set file_path="-1 in_f -2 in_r" #else if $full_ext.endswith("bz2") -bzcat '$inputs.in.raw_in.in_f' > 'in_f' -&& -bzcat '$inputs.in.raw_in.in_r' > 'in_r' -&& - #set file_path="in_f,in_r" + bzcat '$inputs.in.raw_in.in_f' > 'in_f' && + bzcat '$inputs.in.raw_in.in_r' > 'in_r' && + #set file_path="-1 in_f -2 in_r" #else - #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r) + #set file_path="-1 '%s' -2 '%s'" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r) + #end if + #else if $inputs.in.raw_in.selector == "paired_collection" + #set full_ext=$inputs.in.raw_in.in.forward.ext + #if $full_ext != $inputs.in.raw_in.in.reverse.ext + echo "Different datatypes for input paired-end files" && + exit 1 + #end if + #if $full_ext.endswith("gz") + zcat '$inputs.in.raw_in.in.forward' > 'in_f' && + zcat '$inputs.in.raw_in.in.reverse' > 'in_r' && + #set file_path="-1 in_f -2 in_r" + #else if $full_ext.endswith("bz2") + bzcat '$inputs.in.raw_in.in.forward' > 'in_f' && + bzcat '$inputs.in.raw_in.in.reverse' > 'in_r' && + #set file_path="-1 in_f -2 in_r" + #else + #set file_path="-1 '%s' -2 '%s'" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r) #end if #end if @@ -105,20 +111,18 @@ #end if #if $inputs.db.db_selector == "history" -mkdir 'ref_db' -&& -bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' -&& -python '$__tool_directory__/customizemetadata.py' +mkdir 'ref_db' && +bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' && +python + '$__tool_directory__/customizemetadata.py' transform_json_to_pkl --json '$inputs.db.mpa_pkl' - --pkl 'ref_db/custom_db.pkl' -&& + --pkl 'ref_db/custom_db.pkl' && #end if metaphlan #if $inputs.in.selector == "raw" - '$file_path' + $file_path --input_type '$ext' --read_min_len $inputs.in.read_min_len --bt2_ps '$inputs.in.mapping.bt2_ps' @@ -180,12 +184,34 @@ $out.use_group_representative $out.legacy_output $out.CAMI_format_output - $out.unknown_estimation + $out.unclassified_estimation -o '$output_file' --bowtie2out 'bowtie2out' -s '$sam_output_file' --biom '$biom_output_file' --nproc \${GALAXY_SLOTS:-4} +#if $viral_analysis.profile_vsc + $viral_analysis.profile_vsc + --vsc_out '$vcs_breath_coverage' + --vsc_breadth $viral_analysis.vsc_breadth +#end if + +#if $subsample.selector != "no" + #if $subsample.selector == "single" + --subsampling $subsample.subsampling + #else + --subsampling_paired $subsample.subsampling_paired + #end if + $subsample.mapping_subsampling + #if $subsample.subsampling_seed + --subsampling_seed $subsample.subsampling_seed + #end if + --subsampling_output subsampled.out +#end if + +#if $test == "false" + --offline +#end if #if $inputs.in.selector == "raw" && @@ -227,13 +253,17 @@ <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads"> <option value="single" selected="true">One single-end file</option> <option value="multiple">Multiple single-end files</option> + <option value="paired_collection">Paired-end collection</option> <option value="paired">Paired-end files</option> </param> <when value="single"> <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/> </when> <when value="multiple"> - <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with microbiota reads"/> + <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ files with microbiota reads" multiple="true"/> + </when> + <when value="paired_collection"> + <param name="in" type="data_collection" format="@FILE_FORMATS@" label="Paired-end Fasta/FastQ file with microbiota reads" collection_type="paired"/> </when> <when value="paired"> <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/> @@ -255,8 +285,7 @@ <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/> </when> <when value="bowtie2out"> - <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" - help="File needs to be generated with MetaPhlAn versions >3.0"/> + <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions >3.0"/> </when> </conditional> <conditional name="db"> @@ -286,7 +315,7 @@ <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> - <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option> + <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option> <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> </param> @@ -299,24 +328,20 @@ <when value="reads_map"/> <when value="clade_profiles"/> <when value="clade_specific_strain_tracker"> - <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" - help="Markers are also extracted for subclades" /> + <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/> <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> </when> <when value="marker_ab_table"> - <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" - help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/> + <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/> </when> <when value="marker_counts"/> <when value="marker_pres_table"> <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> </when> </conditional> - <param argument="--min_cu_len" type="integer" value="2000" - label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> - <param argument="--min_alignment_len" type="integer" optional="true" - label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> - <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile"> + <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> + <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> + <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true"> <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> @@ -334,48 +359,81 @@ <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/> <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/> <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/> - <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue='' - label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" - help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> + <param argument="--avoid_disqm" type="boolean" truevalue="--avoid_disqm" falsevalue="" checked="true" label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> </section> + <conditional name="subsample"> + <param name="selector" type="select" label="Subsample" help="Subsampling only works for fastq input"> + <option value="no">No</option> + <option value="single">Yes: specify number of reads</option> + <option value="paired">Yes: specify number of paired reads</option> + </param> + <when value="no"/> + <when value="single"> + <param argument="--subsampling" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of reads to be considered"/> + <expand macro="subsample_common"/> + </when> + <when value="paired"> + <param argument="--subsampling_paired" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of paired reads to be considered. For N there will be floor(N/2) reads selected from the forward and reverse reads each."/> + <expand macro="subsample_common"/> + </when> + </conditional> + <conditional name="viral_analysis"> + <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach"> + <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option> + <option value="" selected="true">No</option> + </param> + <when value="--profile_vsc"> + <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/> + </when> + <when value=""/> + </conditional> <section name="out" title="Outputs" expanded="true"> <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> - <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' - label="Use a species as representative for species groups?"/> - <param argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' - label="Old MetaPhlAn2 two columns output?"/> - <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' - label="Report the profiling using the CAMI output format?"/> - <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue='' - label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> - <param name="krona_output" type='boolean' checked="false" truevalue='true' falsevalue='false' label="Output for Krona?"/> + <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/> + <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/> + <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/> + <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> + <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/> </section> + <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) --> + <param name="test" type="hidden" value="false"/> </inputs> <outputs> - <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" /> + <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/> <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output"> <filter>inputs['in']['selector'] == "raw"</filter> </data> <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file"> <filter>inputs['in']['selector'] == "raw"</filter> </data> - <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" /> - <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels" > + <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/> + <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels"> <discover_datasets pattern="(?P<designation>.+)" directory="split_levels/" format="tabular"/> <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter> </collection> <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona"> <filter>out['krona_output']</filter> </data> + <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage"> + <filter>viral_analysis['profile_vsc']</filter> + </data> + <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads"> + <filter>subsample['selector'] == 'single'</filter> + </data> + <collection name="subsample_paired" type="paired" label="${tool.name} on ${on_string}: Subsampled paired reads"> + <data name="forward" format="fastqsanger" from_work_dir="subsampled.R1.out"/> + <data name="reverse" format="fastqsanger" from_work_dir="subsampled.R2.out"/> + <filter>subsample['selector'] == 'paired'</filter> + </collection> </outputs> <tests> + <!-- Single GZ file, Cached db --> <test expect_num_outputs="6"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> <conditional name="raw_in"> - <!-- Single GZ file --> <param name="selector" value="single"/> <param name="in" value="no_taxon_input.fasta"/> </conditional> @@ -386,7 +444,6 @@ </section> </conditional> <conditional name="db"> - <!-- Cached db --> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db-20210409"/> </conditional> @@ -412,7 +469,7 @@ <param name="use_group_representative" value="false"/> <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> + <param name="unclassified_estimation" value="false"/> <param name="krona_output" value="true"/> </section> <output name="output_file" ftype="tabular"> @@ -516,13 +573,16 @@ <has_size value="1" delta="1"/> </assert_contents> </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> </test> + <!-- Single GZ file, Cached db --> <test expect_num_outputs="6"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> <conditional name="raw_in"> - <!-- Single GZ file --> <param name="selector" value="single"/> <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> </conditional> @@ -533,7 +593,6 @@ </section> </conditional> <conditional name="db"> - <!-- Cached db --> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db-20210409"/> </conditional> @@ -559,7 +618,7 @@ <param name="use_group_representative" value="false"/> <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> + <param name="unclassified_estimation" value="false"/> <param name="krona_output" value="true"/> </section> <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> @@ -583,7 +642,7 @@ <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> </assert_contents> </output> - <output_collection name="levels" type="list" > + <output_collection name="levels" type="list"> <element name="all" ftype="tabular"> <assert_contents> <has_text text="Gammaproteobacteria"/> @@ -661,13 +720,16 @@ <has_n_columns n="9"/> </assert_contents> </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> </test> + <!-- Multiple GZ file, Local db--> <test expect_num_outputs="4"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> <conditional name="raw_in"> - <!-- Multiple GZ file --> <param name="selector" value="multiple"/> <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/> </conditional> @@ -678,7 +740,6 @@ </section> </conditional> <conditional name="db"> - <!-- Local db --> <param name="db_selector" value="history"/> <param name="bowtie2db" value="test-db.fasta"/> <param name="mpa_pkl" value="test-db.json"/> @@ -705,7 +766,7 @@ <param name="use_group_representative" value="false"/> <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> + <param name="unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> @@ -732,16 +793,19 @@ <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> </assert_contents> </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> </test> - <test expect_num_outputs="4"> + <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> + <test expect_num_outputs="7"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> <conditional name="raw_in"> - <!-- Paired GZ file --> <param name="selector" value="paired"/> - <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/> - <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/> + <param name="in_f" value="SRS014464-Anterior_nares.fastq.gz"/> + <param name="in_r" value="SRS014464-Anterior_nares.fastq.gz"/> </conditional> <param name="read_min_len" value="70"/> <section name="mapping"> @@ -750,7 +814,183 @@ </section> </conditional> <conditional name="db"> - <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <conditional name="subsample"> + <param name="selector" value="paired"/> + <param name="subsampling_paired" value="20257"/> + <param name="subsampling_seed" value="42"/> + </conditional> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <assert_contents> + <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/> + <has_text text="90240__A0A378QWM4__NCTC12877_00123"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam"> + <assert_contents> + <has_size min="52400" max="52600"/> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <output_collection name="subsample_paired" type="paired"> + <element name="forward"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + </output_collection> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> + <test expect_num_outputs="7"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="paired_collection"/> + <param name="in"> + <collection type="paired" name="pair"> + <element name="forward" value="SRS014464-Anterior_nares.fastq.gz"/> + <element name="reverse" value="SRS014464-Anterior_nares.fastq.gz"/> + </collection> + </param> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <conditional name="subsample"> + <param name="selector" value="paired"/> + <param name="subsampling_paired" value="20257"/> + <param name="subsampling_seed" value="42"/> + </conditional> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <assert_contents> + <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/> + <has_text text="90240__A0A378QWM4__NCTC12877_00123"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam"> + <assert_contents> + <has_size min="52400" max="52600"/> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <output_collection name="subsample_paired" type="paired"> + <element name="forward"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + </output_collection> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- SAM, cached DB --> + <test expect_num_outputs="2"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="sam"/> + <param name="in" value="SRS014464-Anterior_nares.sam"/> + </conditional> + <conditional name="db"> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db-20210409"/> </conditional> @@ -776,69 +1016,7 @@ <param name="use_group_representative" value="false"/> <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> - <param name="krona_output" value="false"/> - </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> - <has_text text="relative_abundance"/> - <has_text text="NCBI_tax_id"/> - <has_text text="clade_name"/> - </assert_contents> - </output> - <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> - <assert_contents> - <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> - <has_text text="37637__U2I1U8__N579_01580"/> - </assert_contents> - </output> - <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> - <assert_contents> - <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> - </assert_contents> - </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> - </assert_contents> - </output> - </test> - <test expect_num_outputs="2"> - <section name="inputs"> - <conditional name="in"> - <!-- SAM --> - <param name="selector" value="sam"/> - <param name="in" value="SRS014464-Anterior_nares.sam"/> - </conditional> - <conditional name="db"> - <!-- Cached db --> - <param name="db_selector" value="cached"/> - <param name="cached_db" value="test-db-20210409"/> - </conditional> - </section> - <section name="analysis"> - <conditional name="analysis_type"> - <param name="t" value="rel_ab"/> - <conditional name="tax_lev"> - <param name="tax_lev" value="a"/> - <param name="split_levels" value="false"/> - </conditional> - </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> - <param name="stat" value="avg_g"/> - <param name="stat_q" value="0.2"/> - <param name="perc_nonzero" value="0.33"/> - <param name="avoid_disqm" value="true"/> - </section> - <section name="out"> - <param name="sample_id_key" value="SampleID"/> - <param name="sample_id" value="Metaphlan_Analysis"/> - <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> - <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> + <param name="unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> @@ -854,16 +1032,18 @@ <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> </assert_contents> </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> </test> + <!-- bowtie2out, cached DB --> <test expect_num_outputs="2"> <section name="inputs"> <conditional name="in"> - <!-- bowtie2out --> <param name="selector" value="bowtie2out"/> <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> </conditional> <conditional name="db"> - <!-- Cached db --> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db-20210409"/> </conditional> @@ -897,7 +1077,7 @@ <param name="use_group_representative" value="false"/> <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> + <param name="unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> @@ -913,13 +1093,16 @@ <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> </assert_contents> </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> </test> + <!-- Single FASTA file, Cached db --> <test expect_num_outputs="6"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> <conditional name="raw_in"> - <!-- Single FASTA file --> <param name="selector" value="single"/> <param name="in" value="SRS014464-Anterior_nares.fasta"/> </conditional> @@ -930,7 +1113,6 @@ </section> </conditional> <conditional name="db"> - <!-- Cached db --> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db-20210409"/> </conditional> @@ -957,7 +1139,7 @@ <param name="use_group_representative" value="false"/> <param name="legacy_output" value="true"/> <param name="CAMI_format_output" value="false"/> - <param name="unknown_estimation" value="false"/> + <param name="unclassified_estimation" value="false"/> <param name="krona_output" value="true"/> </section> <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> @@ -983,7 +1165,7 @@ <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> </assert_contents> </output> - <output_collection name="levels" type="list" > + <output_collection name="levels" type="list"> <element name="all" ftype="tabular"> <assert_contents> <has_text text="Gammaproteobacteria"/> @@ -1054,20 +1236,24 @@ <has_n_columns n="9"/> </assert_contents> </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> </test> - <!-- Check a non-default analysis mode --> - <test expect_num_outputs="4"> + <!-- Check a non-default analysis mode + and viral analysis --> + <test expect_num_outputs="6"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> <conditional name="raw_in"> <param name="selector" value="single"/> - <param name="in" value="SRS014464-Anterior_nares.fasta"/> + <param name="in" value="SRS014464-Anterior_nares.fastq.gz"/> </conditional> </conditional> <conditional name="db"> <param name="db_selector" value="cached"/> - <param name="cached_db" value="test-db-20210409"/> + <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/> </conditional> </section> <section name="analysis"> @@ -1075,13 +1261,44 @@ <param name="t" value="marker_ab_table"/> </conditional> </section> + <conditional name="viral_analysis"> + <param name="profile_vsc" value="--profile_vsc"/> + <param name="vsc_out" value="true"/> + </conditional> + <conditional name="subsample"> + <param name="selector" value="single"/> + <param name="subsampling" value="10000"/> + <param name="subsampling_seed" value="42"/> + </conditional> + <param name="test" value="true"/> <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> <assert_contents> - <has_text text="29394__H3NC06__B8A41_08715"/> + <has_text text="SGB7017__MKDPKOFL_00679"/> <has_text text="SampleID"/> <has_text text="Metaphlan_Analysis"/> </assert_contents> </output> + <output name="subsample_single"> + <assert_contents> + <has_text text="@" n="10000"/> + </assert_contents> + </output> + <!-- reference data empty -> empty output --> + <output name="vcs_breath_coverage" ftype="tabular"> + <assert_contents> + <has_size size="0"/> + </assert_contents> + </output> + <assert_command> + <has_text text="--profile_vsc"/> + <has_text text="--vsc_breadth 0.75"/> + <has_text text="--vsc_out"/> + </assert_command> + <assert_stderr> + <has_text text="Downloading"/> + <!-- due to test=true and the absence of the TOY reference DB Metaphlan will download to ~10MB--> + <has_text text="No reads aligning to VSC markers"/> + </assert_stderr> </test> </tests> <help><