view signature.xml @ 0:d613dbee3ce4

Imported from capsule None
author drosofff
date Mon, 03 Nov 2014 10:29:28 -0500
parents
children 9274c7b1e85c
line wrap: on
line source

<tool id="signature" name="Small RNA Signatures" version="2.0.0">
	<description></description>
	<requirements>
        	<requirement type="package" version="0.12.7">bowtie</requirement>
        	<requirement type="package" version="0.1.18">samtools</requirement>
        	<requirement type="package" version="0.7.7">pysam</requirement>
        	<requirement type="package" version="2.14">biocbasics</requirement>
        	<requirement type="package" version="3.0.3">R</requirement>
	</requirements>
	<command interpreter="python">
           signature.py
           --input $refGenomeSource.input
           --inputFormat $refGenomeSource.input.ext
           --minquery $minquery
           --maxquery $maxquery
           --mintarget $mintarget
           --maxtarget $maxtarget
           --minscope $minscope
           --maxscope $maxscope
           --outputOverlapDataframe $output
          #if $refGenomeSource.genomeSource == "history":
            --referenceGenome $refGenomeSource.ownFile
          #else:
            #silent reference= filter( lambda x: str( x[0] ) == str( $input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
            --referenceGenome $reference
            --extract_index
          #end if
	  --graph $graph_type 
          --rcode $sigplotter
       </command>

	<inputs>
          <conditional name="refGenomeSource">
             <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
               <option value="indexed">Use a built-in index</option>
               <option value="history">Use one from the history</option>
             </param>
	     <when value="indexed">
  	        <param name="input" type="data" format="tabular,sam,bam" label="Compute signature from this bowtie standard output">
		  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
	        </param>
	     </when>
             <when value="history">
                <param name="ownFile" type="data" format="fasta"  label="Select the fasta reference" />
  	        <param name="input" type="data" format="tabular,sam,bam" label="Compute signature from this bowtie standard output"/>
             </when>
          </conditional>  <!-- refGenomeSource -->
		<param name="minquery" type="integer" size="3" value="23" label="Min size of query small RNAs" help="'23' = 23 nucleotides"/>
		<param name="maxquery" type="integer" size="3" value="29" label="Max size of query small RNAs" help="'29' = 29 nucleotides"/>
                <param name="mintarget" type="integer" size="3" value="23" label="Min size of target small RNAs" help="'23' = 23 nucleotides"/>
                <param name="maxtarget" type="integer" size="3" value="29" label="Max size of target small RNAs" help="'29' = 29 nucleotides"/>
                <param name="minscope" type="integer" size="3" value="1" label="Minimal relative overlap analyzed" help="'1' = 1 nucleotide overlap"/>
                <param name="maxscope" type="integer" size="3" value="26" label="Maximal relative overlap analyzed" help="'1' = 1 nucleotide overlap"/>
		<param name="graph_type" type="select" label="Graph type" help="Signature can be computed globally or by item present in the alignment file">
		  <option value="global" selected="True">Global</option>
                  <option value="lattice">Lattice</option>
		</param>
	</inputs>

  <configfiles>
    <configfile name="sigplotter">
      graph_type = "${graph_type}"

      globalgraph = function () {
        ## Setup R error handling to go to stderr
        options( show.error.messages=F,
                 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
        signature = read.delim("${output}", header=TRUE)
        signaturez=(signature[,2] -mean(signature[,2]))/sd(signature[,2])
        YLIM=max(signature[,2])

        ## Open output2 PDF file
        pdf( "${output2}" )
        par(mfrow=c(2,2),oma = c(0, 0, 3, 0))

        plot(signature[,1:2], type = "h", main="Numbers of pairs", cex.main=1, xlab="overlap (nt)", ylim=c(0,YLIM), ylab="Numbers of pairs", col="darkslateblue", lwd=4)

        plot(signaturez, type = "l", main="Number of pairs Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2, cex.lab=1, cex.axis=1, xaxt="n")
        axis(1, at=seq(from=1, to=length(signature[,1]), by=3) )

        plot(signature[,1], signature[,3]*100, type = "l", main="Overlap probabilities",
             cex.main=1, xlab="overlap (nt)", ylab="Probability [%]", ylim=c(0,50),
             pch=19, col="darkslateblue", lwd=2, cex.lab=1, cex.axis=1, xaxt="n")
        axis(1, at=seq(from=1, to=length(signature[,1]), by=3) )

        overlap_prob_z=(signature[,3] -mean(signature[,3]))/sd(signature[,3])
        plot(overlap_prob_z, type = "l", main="Overlap Probability Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2, cex.lab=1, cex.axis=1, xaxt="n")
        axis(1, at=seq(from=1, to=length(signature[,1]), by=3) )
        
        mtext("Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs", outer = TRUE, cex=1)
        devname = dev.off()
        ## Close the PDF file
      }

      treillisgraph = function () {
        ## Open output2 PDF file
        pdf( "${output2}", paper="special", height=11.69, width=8.2677 )
        signature = read.delim("${output}", header=TRUE)
        options( show.error.messages=F,
               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
        library(lattice)
        print (xyplot(signature[,3]*100~signature[,1]|signature[,4], type = "l", xlim=c(1,26), main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",
             par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), scales=list(cex=0.5),
             cex.main=1, cex=.5, xlab="overlap (nt)", ylab="ping-pong signal [%]",
             pch=19, col="darkslateblue", lwd =1.5, cex.lab=1.2, cex.axis=1.2,
             layout=c(4,12), as.table=TRUE, newpage = T) )
        devnname = dev.off()
      }

      if (graph_type=="global") {
	globalgraph()

      }
      if(graph_type=="lattice") {
        treillisgraph()
      }
    </configfile>
  </configfiles>

        <outputs>
                <data name="output" format="tabular" label = "signature data frame"/>
                <data name="output2" format="pdf" label="Overlap probabilities"/>
        </outputs>

        <help>

**What it does**

This tool computes the number of pairs by overlap classes (in nt) from a bowtie output file, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.
The numerical options set the min and max size of both the query small rna class and the target small rna class
Three type of signals are plotted in separate pdf files, the number of pairs founds, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.

        </help>

  <test>
      <param name="genomeSource" value="history" />
      <param name="ownFile" value ="transposons.fasta" ftype="fasta" />
      <param name="input" value="sr_bowtie.output" ftype="tabular" />
      <param name="minquery" value="23" />
      <param name="maxquery" value="29" />
      <param name="mintarget" value="23" />
      <param name="maxtarget" value="29" />
      <param name="minscope" value="1" />
      <param name="maxscope" value="26" />
      <param name="graph_type" value="global" />
      <output name="readmap_dataframe" ftype="tabular" value="signature_data_frame.tab"/>
      <output name="readmap_PDF" ftype="pdf" value="Overlap_probabilities.pdf"/>
  </test>



</tool>