annotate computeGCBias.xml @ 6:5742b322f956 draft default tip

Uploaded
author greg
date Tue, 10 Dec 2013 10:09:39 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
5742b322f956 Uploaded
greg
parents:
diff changeset
1 <tool id="deeptools_computeGCBias" name="computeGCBias" version="1.0.1">
5742b322f956 Uploaded
greg
parents:
diff changeset
2 <description>to see whether your samples should be normalized for GC bias</description>
5742b322f956 Uploaded
greg
parents:
diff changeset
3 <expand macro="requirements" />
5742b322f956 Uploaded
greg
parents:
diff changeset
4 <stdio>
5742b322f956 Uploaded
greg
parents:
diff changeset
5 <exit_code range="0" level="warning" description="Warning" />
5742b322f956 Uploaded
greg
parents:
diff changeset
6 </stdio>
5742b322f956 Uploaded
greg
parents:
diff changeset
7 <macros>
5742b322f956 Uploaded
greg
parents:
diff changeset
8 <import>deepTools_macros.xml</import>
5742b322f956 Uploaded
greg
parents:
diff changeset
9 </macros>
5742b322f956 Uploaded
greg
parents:
diff changeset
10 <command>
5742b322f956 Uploaded
greg
parents:
diff changeset
11 ln -s $bamInput local_bamInput.bam;
5742b322f956 Uploaded
greg
parents:
diff changeset
12 ln -s $bamInput.metadata.bam_index local_bamInput.bam.bai;
5742b322f956 Uploaded
greg
parents:
diff changeset
13
5742b322f956 Uploaded
greg
parents:
diff changeset
14 computeGCBias
5742b322f956 Uploaded
greg
parents:
diff changeset
15
5742b322f956 Uploaded
greg
parents:
diff changeset
16 @THREADS@
5742b322f956 Uploaded
greg
parents:
diff changeset
17
5742b322f956 Uploaded
greg
parents:
diff changeset
18 --bamfile 'local_bamInput.bam'
5742b322f956 Uploaded
greg
parents:
diff changeset
19 --GCbiasFrequenciesFile $outFileName
5742b322f956 Uploaded
greg
parents:
diff changeset
20 --fragmentLength $fragmentLength
5742b322f956 Uploaded
greg
parents:
diff changeset
21
5742b322f956 Uploaded
greg
parents:
diff changeset
22 @reference_genome_source@
5742b322f956 Uploaded
greg
parents:
diff changeset
23
5742b322f956 Uploaded
greg
parents:
diff changeset
24 #if $effectiveGenomeSize.effectiveGenomeSize_opt == "specific":
5742b322f956 Uploaded
greg
parents:
diff changeset
25 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize
5742b322f956 Uploaded
greg
parents:
diff changeset
26 #else:
5742b322f956 Uploaded
greg
parents:
diff changeset
27 --effectiveGenomeSize $effectiveGenomeSize.effectiveGenomeSize_opt
5742b322f956 Uploaded
greg
parents:
diff changeset
28 #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
29
5742b322f956 Uploaded
greg
parents:
diff changeset
30 #if $advancedOpt.showAdvancedOpt == "yes":
5742b322f956 Uploaded
greg
parents:
diff changeset
31 #if str($advancedOpt.region.value) != '':
5742b322f956 Uploaded
greg
parents:
diff changeset
32 --region '$advancedOpt.region'
5742b322f956 Uploaded
greg
parents:
diff changeset
33 #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
34
5742b322f956 Uploaded
greg
parents:
diff changeset
35 --sampleSize '$advancedOpt.sampleSize'
5742b322f956 Uploaded
greg
parents:
diff changeset
36 --regionSize '$advancedOpt.regionSize'
5742b322f956 Uploaded
greg
parents:
diff changeset
37
5742b322f956 Uploaded
greg
parents:
diff changeset
38 #if $advancedOpt.filterOut:
5742b322f956 Uploaded
greg
parents:
diff changeset
39 --filterOut $advancedOpt.filterOut
5742b322f956 Uploaded
greg
parents:
diff changeset
40 #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
41
5742b322f956 Uploaded
greg
parents:
diff changeset
42 #if $advancedOpt.extraSampling:
5742b322f956 Uploaded
greg
parents:
diff changeset
43 --extraSampling $advancedOpt.extraSampling
5742b322f956 Uploaded
greg
parents:
diff changeset
44 #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
45 #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
46
5742b322f956 Uploaded
greg
parents:
diff changeset
47 #if $saveBiasPlot:
5742b322f956 Uploaded
greg
parents:
diff changeset
48 --biasPlot $biasPlot
5742b322f956 Uploaded
greg
parents:
diff changeset
49 #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
50
5742b322f956 Uploaded
greg
parents:
diff changeset
51 ## #if $output.showOutputSettings == "yes"
5742b322f956 Uploaded
greg
parents:
diff changeset
52 ## #if $output.saveBiasPlot:
5742b322f956 Uploaded
greg
parents:
diff changeset
53 ## --biasPlot biasPlot.png ;
5742b322f956 Uploaded
greg
parents:
diff changeset
54 ## mv biasPlot.png $biasPlot
5742b322f956 Uploaded
greg
parents:
diff changeset
55 ## #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
56 ## #end if
5742b322f956 Uploaded
greg
parents:
diff changeset
57
5742b322f956 Uploaded
greg
parents:
diff changeset
58 </command>
5742b322f956 Uploaded
greg
parents:
diff changeset
59 <inputs>
5742b322f956 Uploaded
greg
parents:
diff changeset
60
5742b322f956 Uploaded
greg
parents:
diff changeset
61 <param name="bamInput" format="bam" type="data" label="Input BAM file"
5742b322f956 Uploaded
greg
parents:
diff changeset
62 help="The BAM file must be sorted."/>
5742b322f956 Uploaded
greg
parents:
diff changeset
63
5742b322f956 Uploaded
greg
parents:
diff changeset
64 <expand macro="reference_genome_source" />
5742b322f956 Uploaded
greg
parents:
diff changeset
65 <expand macro="effectiveGenomeSize" />
5742b322f956 Uploaded
greg
parents:
diff changeset
66
5742b322f956 Uploaded
greg
parents:
diff changeset
67 <param name="fragmentLength" type="integer" value="300" min="1"
5742b322f956 Uploaded
greg
parents:
diff changeset
68 label="Fragment length used for the sequencing"
5742b322f956 Uploaded
greg
parents:
diff changeset
69 help ="If paired-end reads are used, the fragment length is computed from the BAM file."/>
5742b322f956 Uploaded
greg
parents:
diff changeset
70
5742b322f956 Uploaded
greg
parents:
diff changeset
71 <conditional name="advancedOpt">
5742b322f956 Uploaded
greg
parents:
diff changeset
72 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
5742b322f956 Uploaded
greg
parents:
diff changeset
73 <option value="no" selected="true">no</option>
5742b322f956 Uploaded
greg
parents:
diff changeset
74 <option value="yes">yes</option>
5742b322f956 Uploaded
greg
parents:
diff changeset
75 </param>
5742b322f956 Uploaded
greg
parents:
diff changeset
76 <when value="no" />
5742b322f956 Uploaded
greg
parents:
diff changeset
77 <when value="yes">
5742b322f956 Uploaded
greg
parents:
diff changeset
78 <param name="region" type="text" value=""
5742b322f956 Uploaded
greg
parents:
diff changeset
79 label="Region of the genome to limit the operation to"
5742b322f956 Uploaded
greg
parents:
diff changeset
80 help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example &quot;chr10&quot; or &quot;chr10:456700:891000&quot;" />
5742b322f956 Uploaded
greg
parents:
diff changeset
81
5742b322f956 Uploaded
greg
parents:
diff changeset
82 <param name="sampleSize" type="integer" value="50000000" min="1"
5742b322f956 Uploaded
greg
parents:
diff changeset
83 label="Number of sampling points to be considered" />
5742b322f956 Uploaded
greg
parents:
diff changeset
84
5742b322f956 Uploaded
greg
parents:
diff changeset
85 <param name="regionSize" type="integer" value="300" min="1"
5742b322f956 Uploaded
greg
parents:
diff changeset
86 label="Region size"
5742b322f956 Uploaded
greg
parents:
diff changeset
87 help ="To plot the reads per GC over a region, the size of the region is required (see below for more details of the mthod). By default, the bin size is set to 300 bp, which is close to the standard fragment size many sequencing applications. However, if the depth of sequencing is low, a larger bin size will be required, otherwise many bins will not overlap with any read."/>
5742b322f956 Uploaded
greg
parents:
diff changeset
88
5742b322f956 Uploaded
greg
parents:
diff changeset
89 <param name="filterOut" type="data" format="bed" optional="true"
5742b322f956 Uploaded
greg
parents:
diff changeset
90 label="BED file containing genomic regions to be excluded from the estimation of the correction"
5742b322f956 Uploaded
greg
parents:
diff changeset
91 help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." />
5742b322f956 Uploaded
greg
parents:
diff changeset
92 <param name="extraSampling" type="data" format="bed" optional="true"
5742b322f956 Uploaded
greg
parents:
diff changeset
93 label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome"
5742b322f956 Uploaded
greg
parents:
diff changeset
94 help="" />
5742b322f956 Uploaded
greg
parents:
diff changeset
95 </when>
5742b322f956 Uploaded
greg
parents:
diff changeset
96 </conditional>
5742b322f956 Uploaded
greg
parents:
diff changeset
97
5742b322f956 Uploaded
greg
parents:
diff changeset
98 <param name="saveBiasPlot" type="boolean" truevalue="--biasPlot" falsevalue="" checked="True" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
5742b322f956 Uploaded
greg
parents:
diff changeset
99 <!--
5742b322f956 Uploaded
greg
parents:
diff changeset
100 <conditional name="output" >
5742b322f956 Uploaded
greg
parents:
diff changeset
101 <param name="showOutputSettings" type="select" label="Show additional output options" >
5742b322f956 Uploaded
greg
parents:
diff changeset
102 <option value="no" selected="true">no</option>
5742b322f956 Uploaded
greg
parents:
diff changeset
103 <option value="yes">yes</option>
5742b322f956 Uploaded
greg
parents:
diff changeset
104 </param>
5742b322f956 Uploaded
greg
parents:
diff changeset
105 <when value="no" />
5742b322f956 Uploaded
greg
parents:
diff changeset
106 <when value="yes">
5742b322f956 Uploaded
greg
parents:
diff changeset
107 <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/>
5742b322f956 Uploaded
greg
parents:
diff changeset
108 </when>
5742b322f956 Uploaded
greg
parents:
diff changeset
109 </conditional>
5742b322f956 Uploaded
greg
parents:
diff changeset
110 -->
5742b322f956 Uploaded
greg
parents:
diff changeset
111 </inputs>
5742b322f956 Uploaded
greg
parents:
diff changeset
112 <outputs>
5742b322f956 Uploaded
greg
parents:
diff changeset
113 <data format="tabular" name="outFileName" />
5742b322f956 Uploaded
greg
parents:
diff changeset
114 <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot">
5742b322f956 Uploaded
greg
parents:
diff changeset
115 <filter>saveBiasPlot is True</filter>
5742b322f956 Uploaded
greg
parents:
diff changeset
116 <!--<filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter>-->
5742b322f956 Uploaded
greg
parents:
diff changeset
117 </data>
5742b322f956 Uploaded
greg
parents:
diff changeset
118 </outputs>
5742b322f956 Uploaded
greg
parents:
diff changeset
119 <help>
5742b322f956 Uploaded
greg
parents:
diff changeset
120
5742b322f956 Uploaded
greg
parents:
diff changeset
121 **What it does**
5742b322f956 Uploaded
greg
parents:
diff changeset
122
5742b322f956 Uploaded
greg
parents:
diff changeset
123 This tool computes the GC bias using the method proposed by Benjamini and Speed (2012). Nucleic Acids Res. (see below for more explanations)
5742b322f956 Uploaded
greg
parents:
diff changeset
124 The output is used to plot the bias and can also be used later on to correct the bias with the tool correctGCbias.
5742b322f956 Uploaded
greg
parents:
diff changeset
125 There are two plots produced by the tool: a boxplot showing the absolute read numbers per genomic-GC bin and an x-y plot
5742b322f956 Uploaded
greg
parents:
diff changeset
126 depicting the ratio of observed/expected reads per genomic GC content bin.
5742b322f956 Uploaded
greg
parents:
diff changeset
127
5742b322f956 Uploaded
greg
parents:
diff changeset
128 -----
5742b322f956 Uploaded
greg
parents:
diff changeset
129
5742b322f956 Uploaded
greg
parents:
diff changeset
130 **Summary of the method used**
5742b322f956 Uploaded
greg
parents:
diff changeset
131
5742b322f956 Uploaded
greg
parents:
diff changeset
132 In order to estimate how many reads with what kind of GC content one should have sequenced, we first need to determine how many regions the specific
5742b322f956 Uploaded
greg
parents:
diff changeset
133 reference genome contains for each amount of GC content, i.e. how many regions in the genome have 50% GC (or 10% GC or 90% GC or...).
5742b322f956 Uploaded
greg
parents:
diff changeset
134 We then sample a large number of equally sized genome bins and count how many times we see a bin with 50% GC (or 10% GC or 90% or...). These EXPECTED values are independent of any
5742b322f956 Uploaded
greg
parents:
diff changeset
135 sequencing as it only depends on the respective reference genome (i.e. it will most likely vary between mouse and fruit fly due to their genome's different GC contents).
5742b322f956 Uploaded
greg
parents:
diff changeset
136 The OBSERVED values are based on the reads from the sequenced sample. Instead of noting how many genomic regions there are per GC content, we now count the reads per GC content.
5742b322f956 Uploaded
greg
parents:
diff changeset
137 In an ideal sample without GC bias, the ratio of OBSERVED/EXPECTED values should be close to 1 regardless of the GC content. Due to PCR (over)amplifications, the majority of ChIP samples
5742b322f956 Uploaded
greg
parents:
diff changeset
138 usually shows a significant bias towards reads with high GC content (>50%)
5742b322f956 Uploaded
greg
parents:
diff changeset
139
5742b322f956 Uploaded
greg
parents:
diff changeset
140 .. image:: $PATH_TO_IMAGES/QC_GCplots_input.png
5742b322f956 Uploaded
greg
parents:
diff changeset
141
5742b322f956 Uploaded
greg
parents:
diff changeset
142
5742b322f956 Uploaded
greg
parents:
diff changeset
143 **Output files**:
5742b322f956 Uploaded
greg
parents:
diff changeset
144
5742b322f956 Uploaded
greg
parents:
diff changeset
145 - Diagnostic plot
5742b322f956 Uploaded
greg
parents:
diff changeset
146
5742b322f956 Uploaded
greg
parents:
diff changeset
147 - box plot of absolute read numbers per genomic GC bin
5742b322f956 Uploaded
greg
parents:
diff changeset
148 - x-y plot of observed/expected read ratios per genomic GC content bin
5742b322f956 Uploaded
greg
parents:
diff changeset
149
5742b322f956 Uploaded
greg
parents:
diff changeset
150 - Data matrix
5742b322f956 Uploaded
greg
parents:
diff changeset
151
5742b322f956 Uploaded
greg
parents:
diff changeset
152 - to be used for GC correction with correctGCbias
5742b322f956 Uploaded
greg
parents:
diff changeset
153
5742b322f956 Uploaded
greg
parents:
diff changeset
154
5742b322f956 Uploaded
greg
parents:
diff changeset
155 -----
5742b322f956 Uploaded
greg
parents:
diff changeset
156
5742b322f956 Uploaded
greg
parents:
diff changeset
157 .. class:: infomark
5742b322f956 Uploaded
greg
parents:
diff changeset
158
5742b322f956 Uploaded
greg
parents:
diff changeset
159 @REFERENCES@
5742b322f956 Uploaded
greg
parents:
diff changeset
160
5742b322f956 Uploaded
greg
parents:
diff changeset
161 </help>
5742b322f956 Uploaded
greg
parents:
diff changeset
162 </tool>