changeset 1:ea0616fb5c2d draft

planemo upload for repository https://github.com/biobakery/Maaslin2 commit 62a738f626aee9c8e1f1c5cbd63a59b3390d4ed5
author iuc
date Wed, 26 Jun 2024 09:39:59 +0000
parents 377c2e0140b7
children ef9d04ea375b
files maaslin2.xml macros.xml
diffstat 2 files changed, 158 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/maaslin2.xml	Fri Nov 05 11:23:38 2021 +0000
+++ b/maaslin2.xml	Wed Jun 26 09:39:59 2024 +0000
@@ -4,8 +4,37 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="edam_ontology"/>
+    <expand macro="xrefs"/>
     <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
+    
+## get column names of fixed and random effect from the input file, since galaxy 
+## can only return indices with type="data_column" 
+## using awk so that the file is only parsed on command line execution
+
+#if $fixed_effects
+#set idx = []
+#for $i in $fixed_effects:
+    #silent idx.append(f'${i}')
+#end for
+#set idx_for_awk = ','.join(idx)
+
+fixed_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk}' '$input_metadata'` &&
+echo 'Assigned fixed effects as:' \$fixed_effects &&
+#end if
+
+
+#if $random_effects
+#set idx = []
+#for $i in $random_effects:
+    #silent idx.append(f'${i}')
+#end for
+#set idx_for_awk = ','.join(idx)
+
+random_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk}' '$input_metadata'` &&
+echo 'Assigned random effects as:' \$random_effects &&
+#end if
+
 ln -s '$input_data' 'input_data.tsv'
 &&
 ln -s '$input_metadata' 'input_metadata.tsv'
@@ -30,11 +59,15 @@
     --analysis_method '$additional_options.analysis_method'
 #end if
 #if $random_effects
-    --random_effects '$random_effects'
+    --random_effects \$random_effects
 #end if
 #if $fixed_effects
-    --fixed_effects '$fixed_effects'
+    --fixed_effects \$fixed_effects
 #end if
+#if $reference
+    --reference '$reference'
+#end if
+    
 #if $additional_options.correction
     --correction '$additional_options.correction'
 #end if
@@ -44,25 +77,24 @@
     --heatmap_first_n '$output.heatmap_first_n'
 #end if
     $output.plot_scatter
-    --cores \${GALAXY_SLOTS:-4}
+    --cores 1
     'input_data.tsv'
     'input_metadata.tsv'
     'outputFolder'
 &&
-cd outputFolder && mkdir -p figures/ && cp *.pdf figures
+zip -r out.zip outputFolder &&
+cd outputFolder && 
+mkdir -p figures/ && 
+cp *.pdf figures
+
+
     ]]></command>
     <inputs>
         <param name="input_data" type="data" format="tabular" label="Data (or features) file"/>
         <param name="input_metadata" type="data" format="tabular" label="Metadata file"/>
-        <param argument="--fixed_effects" type="select" multiple="true" optional="true" label="Interactions: Fixed effects" help="The fixed effects for the model, comma-delimited for multiple effects">
-            <option value="diagnosis" selected="true">diagnosis</option>
-            <option value="dysbiosisnonIBD" selected="true">dysbiosisnonIBD</option>
-            <option value="dysbiosisUC" selected="true">dysbiosisUC</option>
-            <option value="dysbiosisCD" selected="true">dysbiosisCD</option>
-            <option value="antibiotics" selected="true">antibiotics</option>
-            <option value="age" selected="true">age</option>
-        </param>        
-        <param argument="--random_effects" type="text" multiple="true" optional="true" label="Random effects" help="The random effects for the model,  comma-delimited for multiple effects"/>        
+        <param argument="--fixed_effects" type="data_column" data_ref="input_metadata" use_header_names="true"  multiple="true" optional="true" label="Interactions: Fixed effects" help="The fixed effects for the model, comma-delimited for multiple effects, Default value: All " />
+        <param argument="--random_effects" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="Random effects" help="The random effects for the model,  comma-delimited for multiple effects, Default: None" />
+        <param argument="--reference" type="text"  label="Reference" help="Reference for a variable with more than two levels provided as a string of 'variable,reference' comma delimited for multiple variables. " />
         <section name="additional_options" title="Additional Options" expanded="true">
             <param argument="--min_abundance" type="float" value="0.0" optional="true" label="Minimum abundance" help="The minimum abundance for each feature"/>
             <param argument="--min_prevalence" type="float" value="0.1" optional="true" label="Minimum prevalence" help="The minimum percent of samples for which a feature is detected at minimum abundance"/>
@@ -86,7 +118,14 @@
                 <option value="NEGBIN">NEGBIN</option>
                 <option value="ZINB">ZINB</option>
             </param>
-            <param argument="--correction" type="text" value="BH" optional="true" label="Correction" help="The correction method for computing  the q-value"/>
+            <param argument="--correction" type="select" value="BH" optional="true" label="Correction" help="The correction method for computing  the q-value, Default: BH ">
+                <option value="BH">Benjamini-Hochberg(BH)</option>
+                <option value="BY">Benjamini-Yekutieli(BY)</option>
+                <option value="Bonferroni">Bonferroni</option>
+                <option value="Holm">Holm</option>
+                <option value="Hochberg">Hochberg</option>
+                <option value="Hommel">Hommel</option>
+	    </param>
             <param argument="--standardize" type="boolean" truevalue="--standardize TRUE" falsevalue="--standardize FALSE" checked="true" label="Apply z-score so continuous metadata are on  the same scale"/>
         </section>          
         <section name="output" title="Set Plotting Output" expanded="true">
@@ -96,10 +135,11 @@
             <param name="residuals_output" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Output data frame with residuals for each feature"/>    
         </section>   
    </inputs>
-   <outputs>    
+   <outputs>
+        <data name="archive_output" format="zip" from_work_dir="out.zip" label="${tool.name} on ${on_string}: zip of the complete output" />
         <data name="all_results" format="tabular" from_work_dir="outputFolder/all_results.tsv" label="All results ordered by increasing q-value"/>
         <data name="significant_results" format="tabular" from_work_dir="outputFolder/significant_results.tsv" label="Q-values smaller than or equal to the threshold"/>
-        <data name="residuals" format="rdata" from_work_dir="outputFolder/residuals.rds" label="Data frame with residuals for each feature">       
+        <data name="residuals" format="rdata" from_work_dir="outputFolder/fits/residuals.rds" label="Data frame with residuals for each feature">       
             <filter>output['residuals_output'] is True</filter>
         </data>         
         <data format="pdf" name="headmap" from_work_dir="outputFolder/figures/heatmap.pdf" label="Heatmap of the significant associations" >
@@ -111,11 +151,12 @@
         </collection>
     </outputs>
     <tests>
-        <test expect_num_outputs="5">
+        <test expect_num_outputs="6">
             <param name="input_data" value="HMP2_taxonomy.tsv"/>
             <param name="input_metadata" value="HMP2_metadata.tsv"/>
-            <param name="random_effects" value="site,subject"/>
-            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age"/>
+            <param name="random_effects" value= "2,5"/> 
+            <param name="fixed_effects" value="4,9,10,11,6,3"/> 
+            <param name="reference" value="diagnosis,nonIBD"/>
             <section name="additional_options">
                 <param name="min_abundance" value="0.0"/>
                 <param name="min_prevalence" value="0.1"/>
@@ -132,6 +173,11 @@
                 <param name="plot_scatter" value="true"/>
                 <param name="residuals_output" value="true"/>
             </section>
+            <output name="archive_output">
+                <assert_contents>
+                    <has_size value="15005328" delta="1000000" />
+                </assert_contents>
+            </output>
             <output name="all_results">
                 <assert_contents>
                     <has_text text="feature"/>
@@ -142,13 +188,13 @@
             <output name="significant_results">
                 <assert_contents>
                     <has_text text="dysbiosisCD"/>
-                    <has_n_lines n="159"/>
+                    <has_n_lines n="159" delta="10"/>
                     <has_n_columns n="9"/>
                 </assert_contents>
             </output>
             <output name="residuals">
                 <assert_contents>
-                    <has_size value="462746" delta="1000"/>
+                    <has_size value="462386" />
                 </assert_contents>
             </output>
             <output name="headmap">
@@ -194,10 +240,11 @@
                 </element>                                                                        
             </output_collection>
         </test>
-        <test expect_num_outputs="5">
+        <test expect_num_outputs="6">
             <param name="input_data" value="HMP2_taxonomy.tsv"/>
             <param name="input_metadata" value="HMP2_metadata.tsv"/>
-            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD"/>
+            <param name="fixed_effects" value="4,9"/>
+            <param name="reference" value="diagnosis,nonIBD"/>
             <section name="additional_options">
                 <param name="min_abundance" value="0.0"/>
                 <param name="min_prevalence" value="0.1"/>
@@ -214,6 +261,11 @@
                 <param name="plot_scatter" value="true"/>
                 <param name="residuals_output" value="true"/>
             </section>
+            <output name="archive_output">
+                <assert_contents>
+                    <has_size value="12630049" delta="1000000" />
+                </assert_contents>
+            </output>
             <output name="all_results">
                 <assert_contents>
                     <has_text text="feature"/>
@@ -224,13 +276,13 @@
             <output name="significant_results">
                 <assert_contents>
                     <has_text text="diagnosis"/>
-                    <has_n_lines n="175"/>
+                    <has_n_lines n="175" delta="5"/>
                     <has_n_columns n="9"/>
                 </assert_contents>
             </output>
             <output name="residuals">
                 <assert_contents>
-                    <has_size value="367224" delta="1000"/>
+                    <has_size value="366875"/>
                 </assert_contents>
             </output>
             <output_collection name="figures_pdfs" type="list">
@@ -241,10 +293,11 @@
                 </element>                                                              
             </output_collection>
         </test>
-        <test expect_num_outputs="5">
+        <test expect_num_outputs="6">
             <param name="input_data" value="HMP2_taxonomy.tsv"/>
             <param name="input_metadata" value="HMP2_metadata.tsv"/>
-            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD"/>
+            <param name="fixed_effects" value="2,4"/>
+            <param name="reference" value="site,Cedars-Sinai,diagnosis,UC"/>
             <section name="additional_options">
                 <param name="min_abundance" value="0.0001"/>
                 <param name="min_prevalence" value="0.1"/>
@@ -261,34 +314,39 @@
                 <param name="plot_scatter" value="true"/>
                 <param name="residuals_output" value="true"/>
             </section>
+            <output name="archive_output">
+                <assert_contents>
+                    <has_size value="18278259" delta="1000000" />
+                </assert_contents>
+            </output>
             <output name="all_results">
                 <assert_contents>
                     <has_text text="feature"/>
-                    <has_n_lines n="250"/>
+                    <has_n_lines n="415" delta="10"/>
                     <has_n_columns n="9"/>
                 </assert_contents>
             </output>
             <output name="significant_results">
                 <assert_contents>
                     <has_text text="diagnosis"/>
-                    <has_n_lines n="172"/>
+                    <has_n_lines n="300" delta="15"/>
                     <has_n_columns n="9"/>
                 </assert_contents>
             </output>
             <output name="residuals">
                 <assert_contents>
-                    <has_size value="359943" delta="1000"/>
+                    <has_size value="363118"/>
                 </assert_contents>
             </output>
             <output name="headmap">
                 <assert_contents>
-                    <has_size value="6554" delta="1000" />
+                    <has_size value="7000" delta="1000" />
                 </assert_contents>
             </output>
             <output_collection name="figures_pdfs" type="list">
                 <element name="heatmap.pdf" ftype="pdf">
                     <assert_contents>
-                        <has_size value="6554" delta="1000" />
+                        <has_size value="7693" delta="100" />
                     </assert_contents>
                 </element>
                 <element name="diagnosis.pdf" ftype="pdf">
@@ -296,13 +354,65 @@
                         <has_size value="6061545" delta="1000000" />
                     </assert_contents>
                 </element>
-                <element name="dysbiosisnonIBD.pdf" ftype="pdf">
+                                                                        
+            </output_collection>
+        </test> 
+        <test expect_num_outputs="6">  
+      	<param name="input_data" value="HMP2_taxonomy.tsv"/>
+            <param name="input_metadata" value="HMP2_metadata.tsv"/>
+            <param name="fixed_effects" value="7,9"/>
+            <param name="random_effects" value="3" />
+          
+            <section name="additional_options">
+                <param name="min_abundance" value="0.0"/>
+                <param name="min_prevalence" value="0.1"/>
+                <param name="max_significance" value="0.25"/>
+                <param name="normalization" value="TSS"/>
+                <param name="transform" value="LOG"/>
+                <param name="analysis_method" value="LM"/>
+                <param name="correction" value="BY"/>
+                <param name="standardize" value="True"/>
+            </section>
+            <section name="output">
+                <param name="plot_heatmap" value="true"/>
+                <param name="heatmap_first_n" value="50"/>
+                <param name="plot_scatter" value="true"/>
+                <param name="residuals_output" value="true"/>
+            </section>
+            <output name="archive_output">
+                <assert_contents>
+                    <has_size value="8567935" delta="1000000" />
+                </assert_contents>
+            </output>
+            <output name="all_results">
+                <assert_contents>
+                    <has_text text="feature"/>
+                    <has_n_lines n="175" delta="10"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <output name="significant_results">
+                <assert_contents>
+                    <has_text text="dysbiosisnonIBD"/>
+                    <has_n_lines n="95" delta="5"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <output name="residuals">
+                <assert_contents>
+                    <has_size value="434087"/>
+                </assert_contents>
+            </output>
+            <output_collection name="figures_pdfs" type="list">
+                <element name="heatmap.pdf" ftype="pdf">
                     <assert_contents>
-                        <has_size value="2599373" delta="1000000" />
+                        <has_size value="7000" delta="1000" />
                     </assert_contents>
-                </element>                                                                    
+                </element>                                                              
             </output_collection>
-        </test>   
+        </test> 
+       
+            
     </tests>
     <help><![CDATA[
 @HELP_HEADER@
@@ -346,6 +456,9 @@
         - It only includes associations with q-values <= to the threshold.
     - Data frame with residuals for each feature (R data file)
         - This file contains a data frame with residuals for each feature.
+        
+Correction methods to compute the q-value : https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/p.adjust
+
 2- Visualization output files
     - Heatmap of the significant associations (PDF file)
         - This file contains a heatmap of the significant associations.
--- a/macros.xml	Fri Nov 05 11:23:38 2021 +0000
+++ b/macros.xml	Wed Jun 26 09:39:59 2024 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">0.99.12</token>
+    <token name="@TOOL_VERSION@">1.16.0</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="edam_ontology">
@@ -10,10 +10,16 @@
             <edam_topic>topic_3305</edam_topic>
         </edam_topics>           
     </xml>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">maaslin2</xref>
+        </xrefs>
+    </xml>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="@TOOL_VERSION@">maaslin2</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">bioconductor-maaslin2</requirement>
+            <requirement type="package" version="3.0">zip</requirement>
             <yield/>
         </requirements>
     </xml>
- </macros>
\ No newline at end of file
+ </macros>