comparison readmap.xml @ 0:de6a6afc5a79 draft default tip

Uploaded
author drosofff
date Tue, 24 Jun 2014 12:16:43 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:de6a6afc5a79
1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="0.9.2">
2 <description>from sRbowtie aligment</description>
3 <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
4 <parallelism method="basic"></parallelism>
5 <command interpreter="python">
6 readmap.py
7 #if $refGenomeSource.genomeSource == "history":
8 --reference_fasta ## sys.argv[2]
9 $refGenomeSource.ownFile ## index source
10 #else:
11 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
12 --reference_bowtie_index
13 $reference
14 #end if
15 --rcode
16 $plotCode
17 --output_readmap
18 $readmap_dataframe
19 --output_size_distribution
20 $size_distribution_dataframe
21 --minquery
22 $minquery
23 --maxquery
24 $maxquery
25 --input
26 #for $i in $refGenomeSource.series
27 $i.input
28 #end for
29 --ext
30 #for $i in $refGenomeSource.series
31 $i.input.ext
32 #end for
33 --label
34 #for $i in $refGenomeSource.series
35 "$i.input.name"
36 #end for
37 --normalization_factor
38 #for $i in $refGenomeSource.series
39 $i.norm
40 #end for
41 #if $gff:
42 --gff
43 $gff
44 #end if
45
46 </command>
47 <inputs>
48 <conditional name="refGenomeSource">
49 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
50 <option value="indexed">Use a built-in index</option>
51 <option value="history">Use one from the history</option>
52 </param>
53 <when value="indexed">
54 <repeat name="series" title="Add alignment files">
55 <param name="input" type="data" label="Select multiple alignments to parse">
56 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
57 </param>
58 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
59 </repeat>
60 </when>
61 <when value="history">
62 <repeat name="series" title="Add alignment files">
63 <param name="input" type="data" label="Select multiple alignments to parse"/>
64 <param name="norm" type="integer" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
65 </repeat>
66 </when>
67 </conditional>
68 <param name="gff" type="data" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
69 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
70 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/>
71 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/>
72 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/>
73 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/>
74 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
75 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
76 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
77 </param>
78 </inputs>
79 <configfiles>
80 <configfile name="plotCode">
81 ## Setup R error handling to go to stderr
82 options( show.error.messages=F,
83 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
84 library(RColorBrewer)
85 library(lattice)
86 library(latticeExtra)
87 library(grid)
88 library(gridExtra)
89 ##cheetahtemplate data frame implementation
90
91 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL)
92 pdf(file="${readmap_PDF}", paper="special", height=11.69, width=8.2677)
93 n_samples=length(unique(rm\$sample))
94
95 genes=unique(levels(rm\$gene))
96 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x))
97 n_genes=length(per_gene_readmap)
98
99
100 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), fontsize = list(text=96/${rows_per_page}, points=8))
101 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-3), fontsize = list(text=96/${rows_per_page}, points=8))
102 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), fontsize = list(text=96/${rows_per_page}, points=8))
103 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), fontsize = list(text=96/${rows_per_page}, points=8))
104
105
106 plot_readmap=function(df, ...) {
107 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))),
108 data=df,
109 type='h',
110 scales= list(relation="free", x=list(rot=0, cex=0.75, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.75)),
111 xlab=NULL, main=NULL, ylab=NULL,
112 as.table=T,
113 origin = 0,
114 horizontal=FALSE,
115 group=polarity,
116 col=c("red","blue"),
117 ...))
118 }
119
120 plot_size_distribution= function(df, ...) {
121 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
122 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
123 horizontal=FALSE,
124 group=polarity,
125 stack=TRUE,
126 col=c('red', 'blue'),
127 cex=0.75,
128 scales=list(y=list(tick.number=4, rot=90, relation="free"), cex=0.75),
129 prepanel=smR.prepanel,
130 xlab = NULL,
131 ylab = NULL,
132 # par.settings=list(layout.heights=list(top.padding=-2, bottom.padding=-3), fontsize = list(text=8, points=8)),
133 main = NULL , as.table=TRUE, newpage = T, ...)
134 combineLimits(bc)
135 }
136
137 for (i in seq(1,n_genes,${rows_per_page})) {
138 start=i
139 end=i+${rows_per_page}-1
140 if (end>n_genes) {end=n_genes}
141 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap))
142 args.list=c(readmap_plot.list, list(nrow=${rows_per_page}, ncol=1, main="readmaps", left="${ylabel}", sub="readmap coordinate"))
143 do.call(grid.arrange, args.list)
144 }
145
146 devname=dev.off()
147
148 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
149 per_gene_size=lapply(genes, function(x) subset(size, gene==x))
150
151 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677)
152
153 for (i in seq(1,n_genes,${rows_per_page})) {
154 start=i
155 end=i+${rows_per_page}-1
156 if (end>n_genes) {end=n_genes}
157 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size))
158 args.list=c(plot.list, list(nrow=${rows_per_page}, ncol=1, main="size distribution", left="${ylabel}", sub="readsize in nucleotides"))
159 do.call(grid.arrange, args.list)
160 }
161
162 devname=dev.off()
163
164 pdf(file="${combi_PDF}", paper="special", height=11.69, width=8.2677)
165
166 for (i in seq(1,n_genes,${rows_per_page}/2)) {
167 start=i
168 end=i+${rows_per_page}/2-1
169 if (end>n_genes) {end=n_genes}
170 read_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.combination.readmap))
171 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
172 plot.list=rbind(read_plot.list, size_plot.list )
173 args.list=c(plot.list, list(nrow=${rows_per_page}, ncol=1, main="${title}", left="${ylabel}", sub="${xlabel}"))
174 do.call(grid.arrange, args.list)
175 }
176
177 devname=dev.off()
178
179
180 </configfile>
181 </configfiles>
182
183 <outputs>
184 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
185 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/>
186 <data format="pdf" name="readmap_PDF" label="Readmaps"/>
187 <data format="pdf" name="size_PDF" label="Size distribution"/>
188 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/>
189 </outputs>
190 <help>
191
192 **What it does**
193
194 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap",
195 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates
196 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom.
197
198
199 .. class:: warningmark
200
201 '''TIP''' The input data can be produced using the sRbowtie tool.
202
203 ----
204
205 '''Example'''
206
207 Query sequence::
208 For a SAM file as the following:
209
210 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
211
212 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
213
214 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
215
216 produce a plot like this:
217
218 ----
219
220 .. image:: static/images/readmap.png
221 :height: 800
222 :width: 500
223
224 </help>
225 </tool>