comparison readmap.xml @ 0:9af9983dcd02 draft

Imported from capsule None
author drosofff
date Mon, 03 Nov 2014 10:28:40 -0500
parents
children eee9701a7491
comparison
equal deleted inserted replaced
-1:000000000000 0:9af9983dcd02
1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.0.0">
2 <description>from sRbowtie aligment</description>
3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement>
5 <requirement type="package" version="0.1.18">samtools</requirement>
6 <requirement type="package" version="0.7.7">pysam</requirement>
7 <requirement type="package" version="2.14">biocbasics</requirement>
8 <requirement type="package" version="3.0.3">R</requirement>
9 </requirements>
10 <parallelism method="basic"></parallelism>
11 <command interpreter="python">
12 readmap.py
13 #if $refGenomeSource.genomeSource == "history":
14 --reference_fasta ## sys.argv[2]
15 $refGenomeSource.ownFile ## index source
16 #else:
17 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
18 --reference_bowtie_index
19 $reference
20 #end if
21 --rcode
22 $plotCode
23 --output_readmap
24 $readmap_dataframe
25 --output_size_distribution
26 $size_distribution_dataframe
27 --minquery
28 $minquery
29 --maxquery
30 $maxquery
31 --input
32 #for $i in $refGenomeSource.series
33 $i.input
34 #end for
35 --ext
36 #for $i in $refGenomeSource.series
37 $i.input.ext
38 #end for
39 --label
40 #for $i in $refGenomeSource.series
41 "$i.input.name"
42 #end for
43 --normalization_factor
44 #for $i in $refGenomeSource.series
45 $i.norm
46 #end for
47 #if $gff:
48 --gff
49 $gff
50 #end if
51
52 </command>
53 <inputs>
54 <conditional name="refGenomeSource">
55 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
56 <option value="indexed">Use a built-in index</option>
57 <option value="history">Use one from the history</option>
58 </param>
59 <when value="indexed">
60 <repeat name="series" title="Add alignment files">
61 <param name="input" type="data" label="Select multiple alignments to parse">
62 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
63 </param>
64 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
65 </repeat>
66 </when>
67 <when value="history">
68 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, that served as the reference index for the alignments" />
69 <repeat name="series" title="Add alignment files">
70 <param name="input" type="data" label="Select multiple alignments to parse"/>
71 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
72 </repeat>
73 </when>
74 </conditional>
75 <param name="gff" type="data" format="gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
76 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
77 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/>
78 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/>
79 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/>
80 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/>
81 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
82 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
83 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
84 </param>
85 </inputs>
86 <configfiles>
87 <configfile name="plotCode">
88 ## Setup R error handling to go to stderr
89 options( show.error.messages=F,
90 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
91 library(RColorBrewer)
92 library(lattice)
93 library(latticeExtra)
94 library(grid)
95 library(gridExtra)
96
97 ## data frames implementation
98
99 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL)
100 n_samples=length(unique(rm\$sample))
101 genes=unique(levels(rm\$gene))
102 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x)) ####### ?
103 n_genes=length(per_gene_readmap)
104
105 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
106 per_gene_size=lapply(genes, function(x) subset(size, gene==x)) ###### ?
107
108 ## end of data frames implementation
109
110 ## functions
111
112 plot_readmap=function(df, ...) {
113 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))),
114 data=df,
115 type='h',
116 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
117 xlab=NULL, main=NULL, ylab=NULL,
118 as.table=T,
119 origin = 0,
120 horizontal=FALSE,
121 group=polarity,
122 col=c("red","blue"),
123 par.strip.text = list(cex=0.7),
124 ...))
125 }
126
127 plot_size_distribution= function(df, ...) {
128 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
129 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
130 horizontal=FALSE,
131 group=polarity,
132 stack=TRUE,
133 col=c('red', 'blue'),
134 cex=0.75,
135 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
136 prepanel=smR.prepanel,
137 xlab = NULL,
138 ylab = NULL,
139 main = NULL,
140 as.table=TRUE,
141 newpage = T,
142 par.strip.text = list(cex=0.7),
143 ...)
144 combineLimits(bc)
145 }
146
147 ## end of functions
148
149 ## function parameters'
150
151 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
152 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
153 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) )
154 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) )
155
156 ## end of function parameters'
157
158 ## GRAPHS
159
160 if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=${rows_per_page}; extrarow=0 } else {
161 rows_per_page= n_genes; page_height_simple = 11.69/n_genes/4; page_height_combi=11.69/(n_genes*2); extrarow=1 }
162 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test
163
164 pdf(file="${readmap_PDF}", paper="special", height=page_height_simple, width=page_width)
165 for (i in seq(1,n_genes,rows_per_page)) {
166 start=i
167 end=i+rows_per_page-1
168 if (end>n_genes) {end=n_genes}
169 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap))
170 args.list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1,
171 main=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"),
172 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
173 #sub=textGrob("readmap coordinates", gp=gpar(cex=.75), just="bottom")
174 )
175 )
176 do.call(grid.arrange, args.list)
177 }
178 devname=dev.off()
179
180
181 pdf(file="${size_PDF}", paper="special", height=page_height_simple, width=page_width)
182 for (i in seq(1,n_genes,rows_per_page)) {
183 start=i
184 end=i+rows_per_page-1
185 if (end>n_genes) {end=n_genes}
186 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) )
187 args.list=c(plot.list, list(nrow=rows_per_page, ncol=1,
188 main=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"),
189 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
190 #sub="readsize in nucleotides"
191 )
192 )
193 do.call(grid.arrange, args.list)
194 }
195 devname=dev.off()
196
197 pdf(file="${combi_PDF}", paper="special", height=page_height_combi, width=page_width)
198 for (i in seq(1,n_genes,rows_per_page/2)) {
199 start=i
200 end=i+rows_per_page/2-1
201 if (end>n_genes) {end=n_genes}
202 read_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.combination.readmap))
203 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
204 plot.list=rbind(read_plot.list, size_plot.list )
205 args.list=c(plot.list, list(nrow=rows_per_page + extrarow, ncol=1,
206 main=textGrob("${title}", gp=gpar(cex=1), just="top"),
207 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90),
208 sub=textGrob("${xlabel}", gp=gpar(cex=1), just="bottom")
209 )
210 )
211 do.call(grid.arrange, args.list)
212 }
213 devname=dev.off()
214
215
216 </configfile>
217 </configfiles>
218
219 <outputs>
220 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
221 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/>
222 <data format="pdf" name="readmap_PDF" label="Readmaps"/>
223 <data format="pdf" name="size_PDF" label="Size distribution"/>
224 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/>
225 </outputs>
226 <help>
227
228 **What it does**
229
230 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap",
231 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates
232 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom.
233
234
235 .. class:: warningmark
236
237 '''TIP''' The input data can be produced using the sRbowtie tool.
238
239 ----
240
241 '''Example'''
242
243 Query sequence::
244 For a SAM file as the following:
245
246 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
247
248 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
249
250 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
251
252 produce a plot like this:
253
254 ----
255
256 .. image:: static/images/readmap.png
257 :height: 800
258 :width: 500
259
260 </help>
261
262 <test>
263 <param name="genomeSource" value="history" />
264 <param name="ownFile" value ="transposons.fasta" ftype="fasta" />
265 <param name="input" value="sample1.srbowtie_out, sample2.srbowtie_out, sample3.srbowtie_out" ftype="tabular" />
266 <param name="norm" value="1,1,1" />
267 <param name="minquery" value="20" />
268 <param name="maxquery" value="30" />
269 <param name="title" value="Readmaps and size distributions" />
270 <param name="xlabel" value="Coordinates/read size" />
271 <param name="ylabel" value="Number of reads" />
272 <param name="rows_per_page" value="8" />
273 <output name="readmap_dataframe" ftype="tabular" value="Readmap_dataframe.tab" />
274 <output name="size_distribution_dataframe" ftype="tabular" value="Size_distribution_dataframe.tab" />
275 <output name="readmap_PDF" ftype="pdf" value="Readmaps.pdf" />
276 <output name="size_PDF" ftype="pdf" value="Size_distribution.pdf" />
277 <output name="combi_PDF" ftype="pdf" value="Size_distribution_and_Readmaps.pdf" />
278 </test>
279
280 </tool>