diff dada2_dada.xml @ 5:4a770a261b16 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit 990192685955e9cda0282e348c28ef6462d88a38
author matthias
date Sun, 05 May 2019 12:42:02 -0400
parents 0b3194ac6a95
children 812ec9c53638
line wrap: on
line diff
--- a/dada2_dada.xml	Mon Apr 29 09:53:39 2019 -0400
+++ b/dada2_dada.xml	Sun May 05 12:42:02 2019 -0400
@@ -28,7 +28,7 @@
 derep <- readRDS('$batch_cond.derep')
 #end if
 
-err <- readRDS('$errorrates')
+err <- readRDS('$err')
 
 #if $batch_cond.batch_select == "yes":
 pool <- F
@@ -45,7 +45,7 @@
 ## not needed for end user:    errorEstimationFunction = $errfoo, selfConsist = $selfconsist,
     pool = pool, multithread = nthreads)
 
-    #if $batch_cond.batch_select == "no":
+#if $batch_cond.batch_select == "no":
     #if len($batch_cond.derep) > 1:
     for( id in names(dada_result) ){
         saveRDS(dada_result[[id]], file=file.path("output" ,paste(id, "dada2_dada", sep=".")))
@@ -71,15 +71,15 @@
     </configfiles>
     <inputs>
         <conditional name="batch_cond">
-            <param name="batch_select" type="select" label="Process samples in batches">
+            <param name="batch_select" type="select" label="Process samples in batches" help="process samples jointly (default) or in independent jobs (see also below)">
                 <option value="no">no</option>
                 <option value="yes">yes</option>
             </param>
             <when value="yes">
-                <param name="derep" type="data" format="dada2_derep" label="Dereplicated reads"/>
+                <param argument="derep" type="data" format="dada2_derep" label="Dereplicated reads"/>
             </when>
             <when value="no">
-                <param name="derep" type="data" multiple="true" format="dada2_derep" label="Dereplicated reads"/>
+                <param argument="derep" type="data" multiple="true" format="dada2_derep" label="Dereplicated reads"/>
                 <param argument="pool" type="select" label="Pool samples">
                     <option value="FALSE">process samples individually</option>
                     <option value="TRUE">pool samples</option>
@@ -87,7 +87,7 @@
                 </param>
             </when>
         </conditional>
-        <param name="errorrates" type="data" format="dada2_errorrates" label="Error rates"/>
+        <param argument="err" type="data" format="dada2_errorrates" label="Error rates"/>
         <!-- not needed for end user I guess
         <expand macro="errorEstimationFunction"/>
         <param name="selfconsist" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Alternate between sample inference and error rate estimation until convergence"/>-->
@@ -97,27 +97,46 @@
             <filter>batch_cond['batch_select']=="yes"</filter>
         </data>
         <collection name="data_collection" type="list">
-            <discover_datasets pattern="__name_and_ext__" directory="output" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.dada2_dada" format="dada2_dada" directory="output" />
             <filter>batch_cond['batch_select']=="no"</filter>
         </collection>
     </outputs>
     <tests>
+        <!-- default, non batch -->
         <test>
             <param name="batch_cond|batch_select" value="no"/>
             <param name="batch_cond|derep" value="derepFastq_F3D0_R1.Rdata" ftype="dada2_derep" />
-            <param name="errorrates" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
+            <param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
             <output_collection name="data_collection" type="list">
                 <element name="derepFastq_F3D0_R1.Rdata" file="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
             </output_collection>
         </test>
+        <!-- default, batch -->
+        <test>
+            <param name="batch_cond|batch_select" value="yes"/>
+            <param name="batch_cond|derep" value="derepFastq_F3D0_R1.Rdata" ftype="dada2_derep" />
+            <param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
+            <output name="dada" value="dada_F3D0_R1.Rdata" ftype="dada2_dada" />
+       </test>
+       <!-- test for creating input for dada results for reverse, not needed for testing -->
         <test>
             <param name="batch_cond|batch_select" value="no"/>
             <param name="batch_cond|derep" value="derepFastq_F3D0_R2.Rdata" ftype="dada2_derep" />
-            <param name="errorrates" value="learnErrors_F3D0_R2.Rdata" ftype="dada2_errorrates" />
+            <param name="err" value="learnErrors_F3D0_R2.Rdata" ftype="dada2_errorrates" />
             <output_collection name="data_collection" type="list">
                 <element name="derepFastq_F3D0_R2.Rdata" file="dada_F3D0_R2.Rdata" ftype="dada2_dada"/>
             </output_collection>
         </test>
+        <!-- test non-default options -->
+        <test>
+            <param name="batch_cond|batch_select" value="no"/>
+            <param name="batch_cond|derep" value="derepFastq_F3D0_R1.Rdata" ftype="dada2_derep" />
+            <param name="batch_cond|pool" value="pseudo"/>
+            <param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
+            <output_collection name="data_collection" type="list">
+                <element name="derepFastq_F3D0_R1.Rdata" file="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
+            </output_collection>
+        </test>
     </tests>
     <help><![CDATA[
 Description
@@ -140,7 +159,7 @@
 - Jointly (Process "samples in batches"=no): A single Galaxy job is started that processes all derep data sets jointly. You may chose different pooling strategies: if the started dada job processes the samples individually, pooled, or pseudo pooled.
 - In batches (Process "samples in batches"=yes): A separate Galaxy job is started for earch derep data set. This is equivalent to joint processing and choosing to process samples individually.
 
-While the single dada job (in case of joint processing) can use multiple cores on one compute node, batched processing distributes the work on a number of jobs (equal to the number of input derep data sets) where each can use multiple cores. Hence, if you intend to or need to process the data sets individually, batched processing is more efficient -- in particular if Galaxy has access to a larger number of compute ressources.
+While the single dada job (in case of joint processing) can use multiple cores on one compute node, batched processing distributes the work on a number of jobs (equal to the number of input derep data sets) where each can use multiple cores. Hence, if you intend to or need to process the data sets individually, batched processing is more efficient -- in particular if Galaxy has access to a larger number of compute resources.
 
 A typical use case of individual processing of the samples are large data sets for which the pooled strategy needs to much time or memory.