view signature.xml @ 2:2b30861d95f4 draft

Uploaded
author mvdbeek
date Sun, 29 Mar 2015 11:58:31 -0400
parents 9274c7b1e85c
children 741cc1d16813
line wrap: on
line source

<tool id="signature" name="Small RNA Signatures" version="2.0.1">
    <description />
    <requirements>
        <requirement type="package" version="0.12.7">bowtie</requirement>
        <requirement type="package" version="0.1.18">samtools</requirement>
        <requirement type="package" version="0.7.7">pysam</requirement>
        <requirement type="package" version="2.14">biocbasics</requirement>
        <requirement type="package" version="3.0.3">R</requirement>
        <requirement type="package" version="1.9">numpy</requirement>
        <requirement type="package" version="0.14">scipy</requirement>
    </requirements>
    <command interpreter="python">
           signature.py
           --input $refGenomeSource.input
           --inputFormat $refGenomeSource.input.ext
           --minquery $minquery
           --maxquery $maxquery
           --mintarget $mintarget
           --maxtarget $maxtarget
           --minscope $minscope
           --maxscope $maxscope
           --outputOverlapDataframe $output
          #if $refGenomeSource.genomeSource == "history":
            --referenceGenome $refGenomeSource.ownFile
          #else:
            #silent reference= filter( lambda x: str( x[0] ) == str( $input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
            --referenceGenome $reference
            --extract_index
          #end if
	  --graph $graph_type 
          --rcode $sigplotter
       </command>
    <inputs>
        <conditional name="refGenomeSource">
            <param help="Built-ins were indexed using default options" label="Will you select a reference genome from your history or use a built-in index?" name="genomeSource" type="select">
                <option value="indexed">Use a built-in index</option>
                <option value="history">Use one from the history</option>
            </param>
            <when value="indexed">
                <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data">
                    <validator message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history." metadata_column="0" metadata_name="dbkey" table_name="bowtie_indexes" type="dataset_metadata_in_data_table" />
                </param>
            </when>
            <when value="history">
                <param format="fasta" label="Select the fasta reference" name="ownFile" type="data" />
                <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data" />
            </when>
        </conditional>
        <param help="'23' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />
        <param help="'29' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />
        <param help="'23' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />
        <param help="'29' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />
        <param help="'1' = 1 nucleotide overlap" label="Minimal relative overlap analyzed" name="minscope" size="3" type="integer" value="1" />
        <param help="'1' = 1 nucleotide overlap" label="Maximal relative overlap analyzed" name="maxscope" size="3" type="integer" value="26" />
        <param help="Signature can be computed globally or by item present in the alignment file" label="Graph type" name="graph_type" type="select">
            <option selected="True" value="global">Global</option>
            <option value="lattice">Lattice</option>
        </param>
    </inputs>
    <outputs>
        <data format="tabular" label="signature data frame" name="output" />
        <data format="pdf" label="Overlap probabilities" name="output2" />
    </outputs>
    <tests>
        <test>
            <param name="genomeSource" value="history" />
            <param ftype="fasta" name="ownFile" value="ensembl.fa" />
            <param ftype="bam" name="input" value="sr_bowtie.bam" />
            <param name="minquery" value="23" />
            <param name="maxquery" value="29" />
            <param name="mintarget" value="23" />
            <param name="maxtarget" value="29" />
            <param name="minscope" value="5" />
            <param name="maxscope" value="15" />
            <param name="graph_type" value="global" />
            <output file="signature.tab" ftype="tabular" name="output" />
            <output file="signature.pdf" ftype="pdf" name="output2" />
        </test>
    </tests>
    <help>

**What it does**

This tool computes the number of pairs by overlap classes (in nt) from a bowtie output file, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.   
The numerical options set the min and max size of both the query small rna class and the target small rna class.   
Three type of signals are plotted in separate pdf files, the number of pairs founds, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.   

        </help>
    <citations>
            <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>
    </citations>
    <configfiles>
        <configfile name="sigplotter">
      graph_type = "${graph_type}"

      globalgraph = function () {
        ## Setup R error handling to go to stderr
        options( show.error.messages=F,
                 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
        signature = read.delim("${output}", header=TRUE)
        signaturez=data.frame(signature[,1], (signature[,2] -mean(signature[,2]))/sd(signature[,2]))
        overlap_prob_z=data.frame(signature[,1], (signature[,3] -mean(signature[,3]))/sd(signature[,3]))
        YLIM=max(signature[,2])

        ## Open output2 PDF file
        pdf( "${output2}" )
        par(mfrow=c(2,2),oma = c(0, 0, 3, 0))

        plot(signature[,1:2], type = "h", main="Numbers of pairs", cex.main=1, xlab="overlap (nt)", ylim=c(0,YLIM), ylab="Numbers of pairs", col="darkslateblue", lwd=4)

        plot(signaturez, type = "l", main="Number of pairs Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)

        plot(signature[,1], signature[,3]*100, type = "l", main="Overlap probabilities",
             cex.main=1, xlab="overlap (nt)", ylab="Probability [%]", ylim=c(0,50),
             pch=19, col="darkslateblue", lwd=2)

        plot(overlap_prob_z, type = "l", main="Overlap Probability Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)

        mtext("Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs", outer = TRUE, cex=1)
        devname = dev.off()
        ## Close the PDF file
      }

      treillisgraph = function () {
        ## Open output2 PDF file
        pdf( "${output2}", paper="special", height=11.69, width=8.2677 )
        signature = read.delim("${output}", header=TRUE)
        options( show.error.messages=F,
               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
        library(lattice)
        print (xyplot(signature[,3]*100~signature[,1]|signature[,4], type = "l", xlim=c(${minscope},${maxscope}), main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",
             par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), scales=list(cex=0.5),
             cex.main=1, cex=.5, xlab="overlap (nt)", ylab="ping-pong signal [%]",
             pch=19, col="darkslateblue", lwd =1.5, cex.lab=1.2, cex.axis=1.2,
             layout=c(4,12), as.table=TRUE, newpage = T) )
        devnname = dev.off()
      }

      if (graph_type=="global") {
        globalgraph()

      }
      if(graph_type=="lattice") {
        treillisgraph()
      }
    </configfile>
    </configfiles>
</tool>