comparison size_histogram.xml @ 0:de6a6afc5a79 draft default tip

Uploaded
author drosofff
date Tue, 24 Jun 2014 12:16:43 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:de6a6afc5a79
1 <tool id="Size_histogram" name="Generate size histograms from alignment files" version="0.9.0">
2 <description>from sRbowtie aligment</description>
3 <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
4 <parallelism method="basic"></parallelism>
5 <command interpreter="python">
6 size_histogram.py
7 #if $refGenomeSource.genomeSource == "history":
8 --reference_fasta ## sys.argv[2]
9 $refGenomeSource.ownFile ## index source
10 #else:
11 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
12 --reference_bowtie_index
13 $reference
14 #end if
15 --rcode
16 $plotCode
17 --output_size_distribution
18 $size_distribution_dataframe
19 --minquery
20 $minquery
21 --maxquery
22 $maxquery
23 --input
24 #for $i in $refGenomeSource.series
25 $i.input
26 #end for
27 --ext
28 #for $i in $refGenomeSource.series
29 $i.input.ext
30 #end for
31 --label
32 #for $i in $refGenomeSource.series
33 "$i.input.name"
34 #end for
35 --normalization_factor
36 #for $i in $refGenomeSource.series
37 $i.norm
38 #end for
39 #if $gff:
40 --gff
41 $gff
42 #end if
43 #if $global.value == 'yes':
44 --global_size
45 #end if
46 #if $collapsestrands.value == 'yes':
47 --collapse
48 #end if
49
50 </command>
51 <inputs>
52 <conditional name="refGenomeSource">
53 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
54 <option value="indexed">Use a built-in index</option>
55 <option value="history">Use one from the history</option>
56 </param>
57 <when value="indexed">
58 <repeat name="series" title="Add alignment files">
59 <param name="input" type="data" label="Select multiple alignments to parse">
60 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
61 </param>
62 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
63 </repeat>
64 </when>
65 <when value="history">
66 <repeat name="series" title="Add alignment files">
67 <param name="input" type="data" label="Select multiple alignments to parse"/>
68 <param name="norm" type="integer" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
69 </repeat>
70 </when>
71 </conditional>
72 <param name="gff" type="data" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
73 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
74 <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">
75 <option value="no">for each item</option>
76 <option value="yes">global</option>
77 </param>
78 <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">
79 <option value="no">Do not collapse</option>
80 <option value="yes">Collapse + and - reads</option>
81 </param>
82 <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/>
83 <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/>
84 <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>
85 <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>
86 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
87 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
88 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
89 </param>
90 </inputs>
91 <configfiles>
92 <configfile name="plotCode">
93 ## Setup R error handling to go to stderr
94 options( show.error.messages=F,
95 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
96 library(RColorBrewer)
97 library(lattice)
98 library(latticeExtra)
99 library(grid)
100 library(gridExtra)
101 ##cheetahtemplate data frame implementation
102
103 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
104
105 n_samples=length(unique(size\$sample))
106 genes=unique(levels(size\$gene))
107 n_genes=length(genes)
108
109 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-3, strip = .75), fontsize = list(text=96/${rows_per_page}, points=8))
110 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
111
112 plot_size_distribution= function(df, ...) {
113 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
114 horizontal=FALSE,
115 group=polarity,
116 stack=TRUE,
117 col=c('red', 'blue'),
118 strip = strip.custom(par.strip.text = list(cex = 0.5)),
119 cex=0.75,
120 scales=list(y=list(tick.number=4, rot=90, relation="free"), cex=0.75),
121 xlab = "readsize in nucleotides",
122 ylab = "${ylabel}",
123 main="${title}" ,
124 as.table=TRUE, newpage = T, ...)
125 combineLimits(update(useOuterStrips(bc), layout=c(n_samples,${rows_per_page})), margin.x=F, margin.y=1)
126 }
127
128 per_gene_size=lapply(genes, function(x) subset(size, gene==x))
129
130 global = "no"
131 #if $global.value == 'yes':
132 global = "yes"
133 #end if
134
135 if (global=="no") {
136 options(warn=-1)
137 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677*n_samples/4)
138 plot_size_distribution(size, par.settings=par.settings.size, prepanel=smR.prepanel)
139 } else {
140 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677)
141 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0,
142 horizontal=FALSE,
143 group=polarity,
144 stack=TRUE,
145 col=c('red', 'blue'),
146 cex=0.75,
147 par.settings=list(fontsize = list(text=8, points=8)),
148 scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=0.75),
149 xlab = "readsize in nucleotides",
150 ylab = "${ylabel}",
151 main="${title}" , as.table=TRUE, newpage = T,
152 aspect=0.5)
153 #layout=c(n_samples, ${rows_per_page}))
154 bc
155 }
156 devname=dev.off()
157
158 </configfile>
159 </configfiles>
160
161 <outputs>
162 <data format="tabular" name="size_distribution_dataframe" label="Size distributionn dataframe"/>
163 <data format="pdf" name="size_PDF" label="Size distribution"/>
164 </outputs>
165 <help>
166
167 **What it does**
168
169 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,
170 where by default for each "chromosome" a histogram of read sizes is drawn.
171 Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).
172
173
174 .. class:: warningmark
175
176 '''TIP''' The input data can be produced using the sRbowtie tool.
177
178 ----
179
180 '''Example'''
181
182 Query sequence::
183 For a SAM file as the following:
184
185 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
186
187 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
188
189 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
190
191 produce a plot like this:
192
193 ----
194
195 .. image:: static/images/size_histogram.png
196 :height: 800
197 :width: 500
198
199 </help>
200 </tool>