comparison size_histogram.xml @ 0:63ff807752d7 draft

Imported from capsule None
author drosofff
date Mon, 03 Nov 2014 10:30:29 -0500
parents
children 6c72cf9a00df
comparison
equal deleted inserted replaced
-1:000000000000 0:63ff807752d7
1 <tool id="Size_histogram" name="Generate size histograms from alignment files" version="0.9.5">
2 <description>from sRbowtie aligment</description>
3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement>
5 <requirement type="package" version="0.1.18">samtools</requirement>
6 <requirement type="package" version="0.7.7">pysam</requirement>
7 <requirement type="package" version="2.14">biocbasics</requirement>
8 <requirement type="package" version="3.0.3">R</requirement>
9 </requirements>
10 <parallelism method="basic"></parallelism>
11 <command interpreter="python">
12 size_histogram.py
13 #if $refGenomeSource.genomeSource == "history":
14 --reference_fasta ## sys.argv[2]
15 $refGenomeSource.ownFile ## index source
16 #else:
17 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
18 --reference_bowtie_index
19 $reference
20 #end if
21 --rcode
22 $plotCode
23 --output_size_distribution
24 $size_distribution_dataframe
25 --minquery
26 $minquery
27 --maxquery
28 $maxquery
29 --input
30 #for $i in $refGenomeSource.series
31 $i.input
32 #end for
33 --ext
34 #for $i in $refGenomeSource.series
35 $i.input.ext
36 #end for
37 --label
38 #for $i in $refGenomeSource.series
39 "$i.input.name"
40 #end for
41 --normalization_factor
42 #for $i in $refGenomeSource.series
43 $i.norm
44 #end for
45 #if $gff:
46 --gff
47 $gff
48 #end if
49 #if $global.value == 'yes':
50 --global_size
51 #end if
52 #if $collapsestrands.value == 'yes':
53 --collapse
54 #end if
55
56 </command>
57 <inputs>
58 <conditional name="refGenomeSource">
59 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
60 <option value="indexed">Use a built-in index</option>
61 <option value="history">Use one from the history</option>
62 </param>
63 <when value="indexed">
64 <repeat name="series" title="Add alignment files">
65 <param name="input" type="data" label="Select multiple alignments to parse">
66 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
67 </param>
68 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
69 </repeat>
70 </when>
71 <when value="history">
72 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
73 <repeat name="series" title="Add alignment files">
74 <param name="input" type="data" label="Select multiple alignments to parse"/>
75 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
76 </repeat>
77 </when>
78 </conditional>
79 <param name="gff" type="data" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
80 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
81 <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">
82 <option value="no">for each item</option>
83 <option value="yes">global</option>
84 </param>
85 <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">
86 <option value="no">Do not collapse</option>
87 <option value="yes">Collapse + and - reads</option>
88 </param>
89 <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/>
90 <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/>
91 <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>
92 <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>
93 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
94 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
95 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
96 </param>
97 </inputs>
98 <configfiles>
99 <configfile name="plotCode">
100 ## Setup R error handling to go to stderr
101 options( show.error.messages=F,
102 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
103 library(RColorBrewer)
104 library(lattice)
105 library(latticeExtra)
106 library(grid)
107 library(gridExtra)
108
109 ##cheetahtemplate data frame implementation
110 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
111 n_samples = length(unique (size\$sample))
112 n_genes = length (unique (levels(size\$gene)))
113
114 par.settings.size=list(layout.heights=list(top.padding=1, bottom.padding=1),
115 strip.background = list(col = c("lightblue", "lightgreen"))
116 )
117
118 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} # use if one want y axis in the middle of the plot
119
120 plot_size_distribution= function(df, ...) {
121 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
122 horizontal=FALSE,
123 group=polarity,
124 stack=TRUE,
125 col=c('red', 'blue'),
126 cex=0.75,
127 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.5, alternating=T), x=list(cex=.6 ) ),
128 xlab = "readsize in nucleotides",
129 ylab = "${ylabel}",
130 main="${title}" ,
131 par.strip.text = list(cex=0.75),
132 as.table=TRUE,
133 newpage = T,
134 ...)
135
136 combineLimits(update(useOuterStrips(bc,
137 strip.left = strip.custom(par.strip.text = list(cex=0.5))
138 ),
139 layout=c(n_samples,${rows_per_page})),
140 margin.x=F, margin.y=1)
141 }
142
143 # per_gene_size=lapply(genes, function(x) subset(size, gene==x)) # no object in this script
144
145 global = "no"
146 #if $global.value == 'yes':
147 global = "yes"
148 #end if
149
150 if (global=="no") {
151
152 options(warn=-1)
153 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677*n_samples/4)
154 plot_size_distribution(size, par.settings=par.settings.size) # removed , prepanel=smR.prepanel
155
156 } else {
157
158 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677)
159 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0,
160 horizontal=FALSE,
161 group=polarity,
162 stack=TRUE,
163 col=c('red', 'blue'),
164 # par.settings=list(fontsize = list(text=8, points=8)),
165 scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1),
166 xlab = "readsize in nucleotides",
167 ylab = "${ylabel}",
168 main="${title}" , as.table=TRUE, newpage = T,
169 aspect=0.5,
170 strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue")
171 )
172 bc
173 }
174 devname=dev.off()
175
176 </configfile>
177 </configfiles>
178
179 <outputs>
180 <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/>
181 <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/>
182 </outputs>
183 <help>
184
185 **What it does**
186
187 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,
188 where by default for each "chromosome" a histogram of read sizes is drawn.
189 Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).
190
191
192 .. class:: warningmark
193
194 '''TIP''' The input data can be produced using the sRbowtie tool.
195
196 ----
197
198 '''Example'''
199
200 Query sequence::
201 For a SAM file as the following:
202
203 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
204
205 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
206
207 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
208
209 produce a plot like this:
210
211 ----
212
213 .. image:: static/images/size_histogram.png
214 :height: 800
215 :width: 500
216
217 </help>
218 <test>
219 <param name="genomeSource" value="history" />
220 <param name="ownFile" value ="transposons.fasta" ftype="fasta" />
221 <param name="input" value="sample1.srbowtie_out, sample2.srbowtie_out, sample3.srbowtie_out" ftype="tabular" />
222 <param name="norm" value="1,1,1" />
223 <param name="global" value="no" />
224 <param name="collapsestrands" value="no" />
225 <param name="minquery" value="18"/>
226 <param name="maxquery" value="30"/>
227 <param name="title" value="Size distribution"/>
228 <param name="xlabel" value="Size in nucleotides"/>
229 <param name="ylabel" value="Number of reads"/>
230 <param name="rows_per_page" value="10"/>
231 <output name="size_distribution_dataframe" ftype="tabular" value="Size_distribution_dataframe.tab" />
232 <output name="size_PDF" ftype="pdf" value="Size_distribution.pdf" />
233 </test>
234 </tool>
235