annotate signature.xml @ 12:a1defb9a7385 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08-dirty
author drosofff
date Wed, 07 Jun 2017 18:07:47 -0400
parents b1a15b5a3f1b
children 7b1f4bc21749
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
a1defb9a7385 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08-dirty
drosofff
parents: 11
diff changeset
1 <tool id="signature" name="Small RNA Signatures" version="2.1.1">
2
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
2 <description />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
3 <requirements>
11
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
4 <requirement type="package" version="1.1.2">bowtie</requirement>
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
5 <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
6 <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
7 <!-- <requirement type="package" version="3.1.2">R</requirement> -->
12
a1defb9a7385 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08-dirty
drosofff
parents: 11
diff changeset
8 <requirement type="package" version="0.20_33=r3.3.1_0">r-lattice</requirement>
2
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
9 </requirements>
11
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
10 <command><![CDATA[
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
11 python '$__tool_directory__'/signature.py
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
12 --input '$refGenomeSource.input'
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
13 --inputFormat '$refGenomeSource.input.ext'
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
14 --minquery $minquery
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
15 --maxquery $maxquery
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
16 --mintarget $mintarget
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
17 --maxtarget $maxtarget
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
18 --minscope $minscope
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
19 --maxscope $maxscope
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
20 --outputOverlapDataframe $output
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
21 #if $refGenomeSource.genomeSource == "history":
11
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
22 --referenceGenome '$refGenomeSource.ownFile'
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
23 #else:
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
24 #silent reference= filter( lambda x: str( x[0] ) == str( $input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
11
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
25 --referenceGenome '$reference'
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
26 --extract_index
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
27 #end if
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
28 --graph $graph_type
11
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
29 --rcode '$sigplotter'
b1a15b5a3f1b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents: 10
diff changeset
30 ]]></command>
2
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
31 <inputs>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
32 <conditional name="refGenomeSource">
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
33 <param help="Built-ins were indexed using default options" label="Will you select a reference genome from your history or use a built-in index?" name="genomeSource" type="select">
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
34 <option value="indexed">Use a built-in index</option>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
35 <option value="history">Use one from the history</option>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
36 </param>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
37 <when value="indexed">
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
38 <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data">
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
39 <validator message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history." metadata_column="0" metadata_name="dbkey" table_name="bowtie_indexes" type="dataset_metadata_in_data_table" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
40 </param>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
41 </when>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
42 <when value="history">
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
43 <param format="fasta" label="Select the fasta reference" name="ownFile" type="data" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
44 <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
45 </when>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
46 </conditional>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
47 <param help="'23' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
48 <param help="'29' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
49 <param help="'23' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
50 <param help="'29' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
51 <param help="'1' = 1 nucleotide overlap" label="Minimal relative overlap analyzed" name="minscope" size="3" type="integer" value="1" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
52 <param help="'1' = 1 nucleotide overlap" label="Maximal relative overlap analyzed" name="maxscope" size="3" type="integer" value="26" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
53 <param help="Signature can be computed globally or by item present in the alignment file" label="Graph type" name="graph_type" type="select">
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
54 <option selected="True" value="global">Global</option>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
55 <option value="lattice">Lattice</option>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
56 </param>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
57 </inputs>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
58 <outputs>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
59 <data format="tabular" label="signature data frame" name="output" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
60 <data format="pdf" label="Overlap probabilities" name="output2" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
61 </outputs>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
62 <tests>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
63 <test>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
64 <param name="genomeSource" value="history" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
65 <param ftype="fasta" name="ownFile" value="ensembl.fa" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
66 <param ftype="bam" name="input" value="sr_bowtie.bam" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
67 <param name="minquery" value="23" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
68 <param name="maxquery" value="29" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
69 <param name="mintarget" value="23" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
70 <param name="maxtarget" value="29" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
71 <param name="minscope" value="5" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
72 <param name="maxscope" value="15" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
73 <param name="graph_type" value="global" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
74 <output file="signature.tab" ftype="tabular" name="output" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
75 <output file="signature.pdf" ftype="pdf" name="output2" />
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
76 </test>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
77 </tests>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
78 <help>
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
79
2
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
80 **What it does**
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
81
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
82 This tool computes the number of pairs by overlap classes (in nt) from a bowtie output file, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
83 The numerical options set the min and max size of both the query small rna class and the target small rna class.
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
84 Three type of signals are plotted in separate pdf files, the number of pairs founds, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
85
2
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
86 </help>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
87 <citations>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
88 <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
89 </citations>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
90 <configfiles>
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
91 <configfile name="sigplotter">
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
92 graph_type = "${graph_type}"
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
93
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
94 globalgraph = function () {
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
95 ## Setup R error handling to go to stderr
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
96 options( show.error.messages=F,
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
97 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
98 signature = read.delim("${output}", header=TRUE)
1
9274c7b1e85c Fixed issue: now the plot properly reflects a subset of analysed overlaps, i.e 5 to 15 nucleotides of overlap.
chris <drosofff@gmail.com>
parents: 0
diff changeset
99 signaturez=data.frame(signature[,1], (signature[,2] -mean(signature[,2]))/sd(signature[,2]))
9274c7b1e85c Fixed issue: now the plot properly reflects a subset of analysed overlaps, i.e 5 to 15 nucleotides of overlap.
chris <drosofff@gmail.com>
parents: 0
diff changeset
100 overlap_prob_z=data.frame(signature[,1], (signature[,3] -mean(signature[,3]))/sd(signature[,3]))
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
101 YLIM=max(signature[,2])
4
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
102
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
103
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
104 ## Open output2 PDF file
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
105 pdf( "${output2}" )
4
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
106 if (YLIM!=0) {
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
107 par(mfrow=c(2,2),oma = c(0, 0, 3, 0))
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
108
4
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
109 plot(signature[,1:2], type = "h", main="Numbers of pairs", cex.main=1, xlab="overlap (nt)", ylim=c(0,YLIM), ylab="Numbers of pairs", col="darkslateblue", lwd=4)
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
110
4
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
111 plot(signaturez, type = "l", main="Number of pairs Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
112
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
113 plot(signature[,1], signature[,3]*100, type = "l", main="Overlap probabilities",
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
114 cex.main=1, xlab="overlap (nt)", ylab="Probability [%]", ylim=c(0,50),
1
9274c7b1e85c Fixed issue: now the plot properly reflects a subset of analysed overlaps, i.e 5 to 15 nucleotides of overlap.
chris <drosofff@gmail.com>
parents: 0
diff changeset
115 pch=19, col="darkslateblue", lwd=2)
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
116
4
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
117 plot(overlap_prob_z, type = "l", main="Overlap Probability Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)
1
9274c7b1e85c Fixed issue: now the plot properly reflects a subset of analysed overlaps, i.e 5 to 15 nucleotides of overlap.
chris <drosofff@gmail.com>
parents: 0
diff changeset
118
4
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
119 mtext("Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs", outer = TRUE, cex=1)
741cc1d16813 Uploaded
mvdbeek
parents: 2
diff changeset
120 }
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
121 devname = dev.off()
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
122 ## Close the PDF file
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
123 }
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
124
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
125 treillisgraph = function () {
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
126 ## Open output2 PDF file
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
127 pdf( "${output2}", paper="special", height=11.69, width=8.2677 )
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
128 signature = read.delim("${output}", header=TRUE)
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
129 options( show.error.messages=F,
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
130 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
131 library(lattice)
1
9274c7b1e85c Fixed issue: now the plot properly reflects a subset of analysed overlaps, i.e 5 to 15 nucleotides of overlap.
chris <drosofff@gmail.com>
parents: 0
diff changeset
132 print (xyplot(signature[,3]*100~signature[,1]|signature[,4], type = "l", xlim=c(${minscope},${maxscope}), main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
133 par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), scales=list(cex=0.5),
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
134 cex.main=1, cex=.5, xlab="overlap (nt)", ylab="ping-pong signal [%]",
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
135 pch=19, col="darkslateblue", lwd =1.5, cex.lab=1.2, cex.axis=1.2,
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
136 layout=c(4,12), as.table=TRUE, newpage = T) )
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
137 devnname = dev.off()
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
138 }
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
139
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
140 if (graph_type=="global") {
1
9274c7b1e85c Fixed issue: now the plot properly reflects a subset of analysed overlaps, i.e 5 to 15 nucleotides of overlap.
chris <drosofff@gmail.com>
parents: 0
diff changeset
141 globalgraph()
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
142
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
143 }
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
144 if(graph_type=="lattice") {
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
145 treillisgraph()
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
146 }
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
147 </configfile>
2
2b30861d95f4 Uploaded
mvdbeek
parents: 1
diff changeset
148 </configfiles>
0
d613dbee3ce4 Imported from capsule None
drosofff
parents:
diff changeset
149 </tool>