Mercurial > repos > matthias > dada2_dada
diff dada2_dada.xml @ 0:38959bdb0956 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
| author | matthias |
|---|---|
| date | Fri, 08 Mar 2019 06:45:59 -0500 |
| parents | |
| children | 249ba5cbeb6e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dada2_dada.xml Fri Mar 08 06:45:59 2019 -0500 @@ -0,0 +1,116 @@ +<tool id="dada2_dada" name="dada2: dada" version="@DADA2_VERSION@"> + <description>Remove sequencing errors</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ + #if $batch_cond.batch_select == "no" + mkdir output && + #end if + Rscript '$dada2_script' \${GALAXY_SLOTS:-1} + ]]></command> + <configfiles> + <configfile name="dada2_script"><![CDATA[ +library(ggplot2, quietly=T) +library(dada2, quietly=T) + +args <- commandArgs(trailingOnly = TRUE) +nthreads <- as.integer(args[1]) + +#if $batch_cond.batch_select == "no" +derep <- list() +#for $d in $batch_cond.derep: +derep[["$d.element_identifier"]] <- readRDS(file.path('$d.extra_files_path', 'Rdata')) +#end for +#else +derep <- readRDS(file.path('$batch_cond.derep.extra_files_path', 'Rdata')) +#end if + +err <- readRDS(file.path('$errorrates.extra_files_path',"Rdata")) + +#if $batch_cond.batch_select == "yes": +pool <- F +#else + #if $batch_cond.pool == "TRUE" +pool <- T + #else if $batch_cond.pool == "FALSE" +pool <- F + #else +pool <- 'pseudo' + #end if +#end if +dada_result <- dada(derep, err, errorEstimationFunction = $errfoo, selfConsist = $selfconsist, pool = pool, multithread = nthreads) + +#if $batch_cond.batch_select == "no": + for( id in names(dada_result) ){ + saveRDS(dada_result[[id]], file=file.path("output" ,paste(id, "dada2_dada", sep="."))) + } +#else + saveRDS(dada_result, file='$dada') +#end if + +## for( id in names(dada_result) ){ +## tab <- dada_result[[id]]\$clustering +## tabnames <- names(tab) +## tab\$sample <- id +## tabnames<-c("sample", tabnames) +## tab <- tab[,tabnames] +## write.table(tab, file = '$dada', append = T, quote = F, sep = "\t", row.names = F, col.names = F) +## } + ]]></configfile> + </configfiles> + <inputs> + <conditional name="batch_cond"> + <param name="batch_select" type="select" label="Process samples in batches"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param name="derep" type="data" format="dada2_derep" label="Dereplicated reads"/> + </when> + <when value="no"> + <param name="derep" type="data" multiple="true" format="dada2_derep" label="Dereplicated reads"/> + <param name="pool" type="select" label="Pool samples"> + <option value="FALSE">process samples individually</option> + <option value="TRUE">pool samples</option> + <option value="pseudo">pseudo pooling between individually processed samples</option> + </param> + </when> + </conditional> + <param name="errorrates" type="data" format="dada2_errorrates" label="Error rates"/> + <expand macro="errorEstimationFunction"/> + <param name="selfconsist" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Alternate between sample inference and error rate estimation until convergence"/> + </inputs> + <outputs> + <data name="dada" format="dada2_dada"> + <filter>batch_cond['batch_select']=="yes"</filter> + </data> + <collection name="data_collection" type="list"> + <discover_datasets pattern="__name_and_ext__" directory="output" /> + <filter>batch_cond['batch_select']=="no"</filter> + </collection> + </outputs> + <help><![CDATA[ +**Input:** + +- A number of derep data sets computed with derepFastq (given as list or multiple data sets) +- An errorrates data set computed with learnErrors + +You can decide to compute the data jointly or in batches. + +- Jointly (Process "samples in batches"=no): A single Galaxy job is started that processes all derep data sets jointly. You may chose different pooling strategies: if the started dada job processes the samples individually, pooled, or pseudo pooled. +- In batches (Process "samples in batches"=yes): A separate Galaxy job is started for earch derep data set. This is equivalent to joint processing and choosing to process samples individually. + +While the single dada job (in case of joint processing) can use multiple cores on one compute node, +batched processing distributes the work on a number of jobs (equal to the number of input +derep data sets) where each can use multiple cores. Hence, if you intend to process the data +sets individually, batched processing is more efficient -- in particular if Galaxy has access to +a larger number of compute ressources. + + + TODO: Fill in help. + ]]></help> + <expand macro="citations"/> +</tool>
