comparison readmap.xml @ 12:2fc0d4756048 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit e27d18d58ae095e7fad4b08b04370857a1d37964-dirty
author mvdbeek
date Tue, 02 Feb 2016 14:53:10 -0500
parents edd57052f0bb
children 355940295e76
comparison
equal deleted inserted replaced
11:edd57052f0bb 12:2fc0d4756048
1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.1.0"> 1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.1.1">
2 <description>from sRbowtie aligment</description> 2 <description>from sRbowtie aligment</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement> 4 <requirement type="package" version="0.12.7">bowtie</requirement>
5 <requirement type="package" version="0.7.7">pysam</requirement> 5 <requirement type="package" version="0.7.7">pysam</requirement>
6 <requirement type="package" version="3.1.2">R</requirement> 6 <requirement type="package" version="3.1.2">R</requirement>
75 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> --> 75 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
76 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/> 76 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/>
77 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/> 77 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/>
78 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/> 78 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/>
79 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/> 79 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/>
80 <param name="yrange" type="integer" size="6" value="0" label="y axis range for readmap tool" help="leave at 0 for autoscaling"/>
81 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/> 80 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
81 <param name="yrange" type="integer" size="3" value="0" label="y axis range for readmaps. 0 means auto-scaling."/>
82 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?"> 82 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
83 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/> 83 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
84 </param> 84 </param>
85 </inputs> 85 </inputs>
86 <configfiles> 86 <configfiles>
87 <configfile name="plotCode"><![CDATA[ 87 <configfile name="plotCode">
88 ## Setup R error handling to go to stderr 88 ## Setup R error handling to go to stderr
89 options( show.error.messages=F, 89 options( show.error.messages=F,
90 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 90 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
91 library(RColorBrewer) 91 library(RColorBrewer)
92 library(lattice) 92 library(lattice)
93 library(latticeExtra) 93 library(latticeExtra)
94 library(grid) 94 library(grid)
95 library(gridExtra) 95 library(gridExtra)
96 96
97 ## data frames implementation 97 ## data frames implementation
98 98
99 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL) 99 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL)
100 n_samples=length(unique(rm\$sample)) 100 n_samples=length(unique(rm\$sample))
101 genes=unique(levels(rm\$gene)) 101 genes=unique(levels(rm\$gene))
102 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x)) ####### ? 102 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x)) ####### ?
103 n_genes=length(per_gene_readmap) 103 n_genes=length(per_gene_readmap)
104 104
105 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL) 105 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
106 per_gene_size=lapply(genes, function(x) subset(size, gene==x)) ###### ? 106 per_gene_size=lapply(genes, function(x) subset(size, gene==x)) ###### ?
107 107
108 ## end of data frames implementation 108 ## end of data frames implementation
109 109
110 ## functions 110 ## functions
111 111
112 plot_readmap=function(df, ...) { 112 plot_readmap=function(df, ...) {
113 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))), 113 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))),
114 data=df, 114 data=df,
115 type='h', 115 type='h',
116 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), 116 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
117 xlab=NULL, main=NULL, ylab=NULL, 117 xlab=NULL, main=NULL, ylab=NULL,
118 as.table=T, 118 as.table=T,
119 origin = 0, 119 origin = 0,
120 horizontal=FALSE, 120 horizontal=FALSE,
121 group=polarity, 121 group=polarity,
122 col=c("red","blue"), 122 col=c("red","blue"),
123 par.strip.text = list(cex=0.7), 123 par.strip.text = list(cex=0.7),
124 ...)) 124 ...))
125 } 125 }
126 126
127 plot_size_distribution= function(df, ...) { 127 plot_size_distribution= function(df, ...) {
128 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} 128 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
129 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0, 129 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
130 horizontal=FALSE, 130 horizontal=FALSE,
131 group=polarity, 131 group=polarity,
132 stack=TRUE, 132 stack=TRUE,
133 col=c('red', 'blue'), 133 col=c('red', 'blue'),
134 cex=0.75, 134 cex=0.75,
135 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ), 135 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
136 prepanel=smR.prepanel, 136 prepanel=smR.prepanel,
137 xlab = NULL, 137 xlab = NULL,
138 ylab = NULL, 138 ylab = NULL,
139 main = NULL, 139 main = NULL,
140 as.table=TRUE, 140 as.table=TRUE,
141 newpage = T, 141 newpage = T,
142 par.strip.text = list(cex=0.7), 142 par.strip.text = list(cex=0.7),
143 ...) 143 ...)
144 combineLimits(bc) 144 combineLimits(bc)
145 } 145 }
146 146
147 ## end of functions 147 ## end of functions
148 148
149 ## function parameters' 149 ## function parameters'
150 150
151 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) ) 151 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
152 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) ) 152 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
153 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) ) 153 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) )
154 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) ) 154 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) )
155 155
156 ## end of function parameters' 156 ## end of function parameters'
157 157
158 ## GRAPHS 158 ## GRAPHS
159 159
160 if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=${rows_per_page}; extrarow=0 } else { 160 if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=${rows_per_page}; extrarow=0 } else {
161 rows_per_page= 8; page_height_simple = 11.69; page_height_combi=11.69; extrarow=0 } 161 rows_per_page= 8; page_height_simple = 11.69; page_height_combi=11.69; extrarow=0 }
162 ## rows_per_page= 8; page_height_simple = 11.69/7*n_genes; page_height_combi=11.69/9*(n_genes*2); extrarow=0 } 162 ## rows_per_page= 8; page_height_simple = 11.69/7*n_genes; page_height_combi=11.69/9*(n_genes*2); extrarow=0 }
163 ## rows_per_page= n_genes; page_height_simple = 11.69/n_genes/4; page_height_combi=11.69/(n_genes*2); extrarow=1 } 163 ## rows_per_page= n_genes; page_height_simple = 11.69/n_genes/4; page_height_combi=11.69/(n_genes*2); extrarow=1 }
164 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test 164 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test
165 165
166 166 pdf(file="${readmap_PDF}", paper="special", height=page_height_simple, width=page_width)
167 } 167 for (i in seq(1,n_genes,rows_per_page)) {
168 168 start=i
169 pdf(file="${readmap_PDF}", paper="special", height=page_height_simple, width=page_width) 169 end=i+rows_per_page-1
170 for (i in seq(1,n_genes,rows_per_page)) { 170 if (end>n_genes) {end=n_genes}
171 start=i
172 end=i+rows_per_page-1
173 if (end>n_genes) {end=n_genes}
174 if ("${yrange}" != 0) {
175 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) 171 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap))
176 } else { 172 args.list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1,
177 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-"{$yrange}", "{$yrange}"), par.settings=par.settings.readmap)) 173 main=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"),
178 } 174 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
179 args.list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1, 175 #sub=textGrob("readmap coordinates", gp=gpar(cex=.75), just="bottom")
180 main=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"), 176 )
181 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90) 177 )
182 #sub=textGrob("readmap coordinates", gp=gpar(cex=.75), just="bottom") 178 do.call(grid.arrange, args.list)
183 ) 179 }
184 ) 180 devname=dev.off()
185 do.call(grid.arrange, args.list) 181
186 } 182
187 devname=dev.off() 183 pdf(file="${size_PDF}", paper="special", height=page_height_simple, width=page_width)
188 184 for (i in seq(1,n_genes,rows_per_page)) {
189 185 start=i
190 pdf(file="${size_PDF}", paper="special", height=page_height_simple, width=page_width) 186 end=i+rows_per_page-1
191 for (i in seq(1,n_genes,rows_per_page)) { 187 if (end>n_genes) {end=n_genes}
192 start=i 188 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) )
193 end=i+rows_per_page-1 189 args.list=c(plot.list, list(nrow=rows_per_page, ncol=1,
194 if (end>n_genes) {end=n_genes} 190 main=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"),
195 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) ) 191 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
196 args.list=c(plot.list, list(nrow=rows_per_page, ncol=1, 192 #sub="readsize in nucleotides"
197 main=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"), 193 )
198 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90) 194 )
199 #sub="readsize in nucleotides" 195 do.call(grid.arrange, args.list)
200 ) 196 }
201 ) 197 devname=dev.off()
202 do.call(grid.arrange, args.list) 198
203 } 199 pdf(file="${combi_PDF}", paper="special", height=page_height_combi, width=page_width)
204 devname=dev.off() 200 for (i in seq(1,n_genes,rows_per_page/2)) {
205 201 start=i
206 pdf(file="${combi_PDF}", paper="special", height=page_height_combi, width=page_width) 202 end=i+rows_per_page/2-1
207 for (i in seq(1,n_genes,rows_per_page/2)) { 203 if (end>n_genes) {end=n_genes}
208 start=i 204 if ("${yrange}" != 0) {readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else {
209 end=i+rows_per_page/2-1 205 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-"{$yrange}", "{$yrange}"), par.settings=par.settings.readmap)) }
210 if (end>n_genes) {end=n_genes} 206 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
211 read_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.combination.readmap)) 207 plot.list=rbind(read_plot.list, size_plot.list )
212 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size)) 208 args.list=c(plot.list, list(nrow=rows_per_page + extrarow, ncol=1,
213 plot.list=rbind(read_plot.list, size_plot.list ) 209 main=textGrob("${title}", gp=gpar(cex=1), just="top"),
214 args.list=c(plot.list, list(nrow=rows_per_page + extrarow, ncol=1, 210 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90),
215 main=textGrob("${title}", gp=gpar(cex=1), just="top"), 211 sub=textGrob("${xlabel}", gp=gpar(cex=1), just="bottom")
216 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90), 212 )
217 sub=textGrob("${xlabel}", gp=gpar(cex=1), just="bottom") 213 )
218 ) 214 do.call(grid.arrange, args.list)
219 ) 215 }
220 do.call(grid.arrange, args.list) 216 devname=dev.off()
221 } 217
222 devname=dev.off() 218
223 219 </configfile>
224 ]]></configfile>
225 </configfiles> 220 </configfiles>
226 221
227 <outputs> 222 <outputs>
228 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/> 223 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
229 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/> 224 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/>