view dada2_makeSequenceTable.xml @ 1:f2abdc9a849c draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
author matthias
date Fri, 08 Mar 2019 08:43:09 -0500
parents 98e24c66eeb2
children d2e7c5f8a9f7
line wrap: on
line source

<tool id="dada2_makeSequenceTable" name="dada2: makeSequenceTable" version="@DADA2_VERSION@">
    <description>construct a sequence table (analogous to OTU table)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
    Rscript '$dada2_script'
    ]]></command>
    <configfiles>
        <configfile name="dada2_script"><![CDATA[
@READ_FOO@

library(dada2, quietly=T)
#if $filter.plot == "yes"
library(ggplot2, quietly=T)
#end if

samples <- list()
#for $s in $samples:
    #if $len($samples) == 1
    samples <- $read_data($s)
    #else
    samples[["$s.element_identifier"]] <- $read_data($s)
    #end if
#end for
## make sequence table
seqtab <- makeSequenceTable(samples, orderBy = "$orderby")

## get and plot length distribution
seqlen <- data.frame(length = nchar(getSequences(seqtab)))
seqlenq <- quantile(seqlen\$length, probs=c( $filter.plower, $filter.pupper  ))

#if $filter.plot == "yes"
pdf( '$plot' )
ggplot(seqlen) + 
    geom_histogram( aes(x=length), binwidth=1 ) + 
    geom_vline(xintercept=c(seqlenq[1]-0.5, seqlenq[2]+0.5))
bequiet <- dev.off()
#end if

## filter by seqlengths
#if $filter.filter == "yes"
seqtab <- seqtab[,nchar(colnames(seqtab)) %in% seqlenq]
#end if

write.table(seqtab, "$stable", quote=F, sep="\t", row.names = T, col.names = NA)
    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="samples" type="data" multiple="true" format="@DADA_UNIQUES@" label="samples" />
        <param name="orderby" type="select" label="Column order">
            <option value="abundance">abundance</option>
            <option value="nsamples">nsamples</option>
        </param>
        <section name="filter" title="Plot and filter sequence lengths">
            <param name="plower" type="float" min="0" max="1" value="0.01" label="lower quantile" />
            <param name="pupper" type="float" min="0" max="1" value="0.99" label="upper quantile" />
            <param name="plot" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="plot sequence length distribution" />
            <param name="filter" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="filter sequence length using quantiles" />
	</section>
    </inputs>
    <outputs>
        <data name="stable" format="dada2_sequencetable" label="${tool.name} on ${on_string}"/>
        <data name="plot" format="pdf" label="${tool.name} on ${on_string}: sequence length distribution">
            <filter>filter['plot']</filter>
	</data>
    </outputs>

    <help><![CDATA[
        TODO: Fill in help.
    ]]></help>
    <expand macro="citations"/>
</tool>