annotate genetrack.xml @ 15:ebafcd6c3e0e draft

Uploaded
author greg
date Wed, 16 Dec 2015 12:43:31 -0500
parents cd105fdfb0da
children b40ad4bee6cb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
1 <?xml version="1.0"?>
11
497e3274f70b Uploaded
greg
parents: 6
diff changeset
2 <tool id="genetrack" name="GeneTrack" version="@WRAPPER_VERSION@.0">
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
3 <description>peak predictor</description>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
4 <macros>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
5 <import>genetrack_macros.xml</import>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
6 </macros>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
7 <expand macro="requirements" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
8 <command>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
9 python $__tool_directory__/genetrack.py
0368815ae4d5 Uploaded
greg
parents:
diff changeset
10 --input_format $input_format_cond.input_format
6
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
11 #if str($input_format_cond.input_format) == "scidx":
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
12 #for $i in $input_format_cond.input_scidx:
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
13 --input "${i}" "${i.hid}"
0368815ae4d5 Uploaded
greg
parents:
diff changeset
14 #end for
0368815ae4d5 Uploaded
greg
parents:
diff changeset
15 #elif str($input_format_cond.input_format) == "gff":
0368815ae4d5 Uploaded
greg
parents:
diff changeset
16 #for $i in $input_format_cond.input_gff:
0368815ae4d5 Uploaded
greg
parents:
diff changeset
17 --input "${i}" "${i.hid}"
0368815ae4d5 Uploaded
greg
parents:
diff changeset
18 #end for
0368815ae4d5 Uploaded
greg
parents:
diff changeset
19 #end if
0368815ae4d5 Uploaded
greg
parents:
diff changeset
20 --sigma $sigma
0368815ae4d5 Uploaded
greg
parents:
diff changeset
21 --exclusion $exclusion
0368815ae4d5 Uploaded
greg
parents:
diff changeset
22 --up_width $up_width
0368815ae4d5 Uploaded
greg
parents:
diff changeset
23 --down_width $down_width
0368815ae4d5 Uploaded
greg
parents:
diff changeset
24 --filter $filter
0368815ae4d5 Uploaded
greg
parents:
diff changeset
25 </command>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
26 <inputs>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
27 <conditional name="input_format_cond">
0368815ae4d5 Uploaded
greg
parents:
diff changeset
28 <param name="input_format" type="select" label="Format of files for conversion">
6
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
29 <option value="scidx" selected="True">ScIdx</option>
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
30 <option value="gff">Gff</option>
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
31 </param>
6
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
32 <when value="scidx">
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
33 <param name="input_scidx" type="data" format="scidx" multiple="True" label="Predict peaks on" />
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
34 </when>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
35 <when value="gff">
0368815ae4d5 Uploaded
greg
parents:
diff changeset
36 <param name="input_gff" type="data" format="gff" multiple="True" label="Predict peaks on" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
37 </when>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
38 </conditional>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
39 <param name="sigma" type="integer" value="5" min="1" label="Sigma to use when smoothing reads" help="Higher values increase computation but produce more smoothing." />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
40 <param name="exclusion" type="integer" value="20" min="1" label="Peak exclusion zone" help="Exclusion zone around each peak that prevents others from being called." />
12
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
41 <param name="up_width" type="integer" value="10" min="0" label="Exclusion zone of upstream called peaks" />
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
42 <param name="down_width" type="integer" value="10" min="0" label="Exclusion zone of downstream called peaks" />
15
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
43 <param name="filter" type="integer" value="1" min="0" label="Absolute read filter" help="Removes peaks with lower peak height." />
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
44 </inputs>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
45 <outputs>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
46 <collection name="genetrack_output" type="list" label="Genetrack results on ${on_string}">
0368815ae4d5 Uploaded
greg
parents:
diff changeset
47 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="output" ext="gff" visible="false" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
48 </collection>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
49 </outputs>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
50 <tests>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
51 <test>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
52 <param name="input_gff" value="genetrack_input2.gff" ftype="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
53 <param name="input_format" value="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
54 <param name="sigma" value="5" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
55 <param name="exclusion" value="20" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
56 <param name="up_width" value="10" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
57 <param name="down_width" value="10" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
58 <param name="filter" value="3" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
59 <output_collection name="genetrack_output" type="list">
0368815ae4d5 Uploaded
greg
parents:
diff changeset
60 <element name="s5e20u10d10F3_on_data_1" file="genetrack_output2.gff" ftype="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
61 </output_collection>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
62 </test>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
63 <test>
6
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
64 <param name="input_scidx" value="genetrack_input3.scidx" ftype="scidx" />
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
65 <param name="input_format" value="scidx" />
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
66 <param name="sigma" value="5" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
67 <param name="exclusion" value="20" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
68 <param name="up_width" value="10" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
69 <param name="down_width" value="10" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
70 <param name="filter" value="3" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
71 <output_collection name="genetrack_output" type="list">
0368815ae4d5 Uploaded
greg
parents:
diff changeset
72 <element name="s5e20u10d10F3_on_data_1" file="genetrack_output3.gff" ftype="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
73 </output_collection>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
74 </test>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
75 <test>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
76 <param name="input_gff" value="genetrack_input_unsorted4.gff" ftype="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
77 <param name="input_format" value="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
78 <param name="sigma" value="5" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
79 <param name="exclusion" value="20" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
80 <param name="up_width" value="10" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
81 <param name="down_width" value="10" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
82 <param name="filter" value="3" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
83 <output_collection name="genetrack_output" type="list">
0368815ae4d5 Uploaded
greg
parents:
diff changeset
84 <element name="s5e20u10d10F3_on_data_1" file="genetrack_output4.gff" ftype="gff" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
85 </output_collection>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
86 </test>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
87 </tests>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
88 <help>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
89 **What it does**
0368815ae4d5 Uploaded
greg
parents:
diff changeset
90
15
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
91 GeneTrack separately identifies peaks on the forward "+” (W) and reverse “-” (C) strand. The way that GeneTrack
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
92 works is to replace each tag with a probabilistic distribution of occurrences for that tag at and around its mapped
12
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
93 genomic coordinate. The distance decay of the probabilistic distribution is set by adjusting the value of the
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
94 tool's **Sigma to use when smoothing reads** parameter. GeneTrack then sums the distribution over all mapped
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
95 tags. This results in a smooth continuous trace that can be globally broadened or tightened by adjusting the
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
96 sigma value. GeneTrack starts with the highest smoothed peak first, treating each strand separately if indicated
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
97 by the data, then sets up an exclusion zone (centered over the peak) defined by the value of the **Peak exclusion
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
98 zone** parameter (see figure). The exclusion zone prevents any secondary peaks from being called on the same strand
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
99 within that exclusion zone. In rare cases, it may be desirable to set different exclusion zones upstream (more 5’)
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
100 versus downstream (more 3’) of the peak.
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
101
15
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
102 .. image:: $PATH_TO_IMAGES/genetrack.png
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
103
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
104 GeneTrack continues through the data in order of peak height, until no other peaks are found, and in principle will
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
105 call a peak at a single isolated tag, if no filter is set using the tool's **Absolute read filter** parameter. A
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
106 filter value of 1 means that it will stop calling peaks when the tag count in the peak hits 1 (so single tag peaks
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
107 will be excluded in this case). GeneTrack outputs **chrom** (chromosome number), **strand** (+/W or -/C strand),
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
108 **start** (lower coordinate of exclusion zone), **end** (higher coordinate of exclusion zone), and **value** (peak
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
109 height). Genetrack's GFF output reports the start (lower coordinate) and end (higher coordinate) of the exclusion
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
110 zone.
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
111
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
112 In principle, the width of the exclusion zone may be as large as the DNA region occupied by the native protein plus
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
113 a steric exclusion zone between the protein and the exonuclease. On the other hand the site might be considerably
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
114 smaller if the protein is in a denatured state during exonuclease digestion (since it is pre-treated with SDS).
12
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
115
15
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
116 In general, higher resolution data or smaller binding site size data should use smaller sigma values. Large binding
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
117 site size data such as 147 bp nucleosomal DNA use a larger sigma value like 20 (-s 20). For transcription factors
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
118 mapped by ChIP-exo, sigma may initially be set at 5, and the exclusion zone set at 20 (-s 5 –e 20). Sigma is typically
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
119 varied between ~3 and ~20. Too high of a sigma value may merge two independent nearby binding events. This may be
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
120 desirable if closely bound factors are not distinguishable. Too low of a sigma value will cause some tags that
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
121 contribute to a binding event to be excluded, because they may not be located sufficiently close to the main peak.
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
122 If alternative (mutually exclusive) binding is expected for two overlapping sites, and these sites are to be
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
123 independently recorded, then an empirically determined smaller exclusion zone width is set. Thus the value of sigma
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
124 is set empirically for each mapped factor, depending upon the resolution and binding site size of the binding event.
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
125
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
126 It might make sense to exclude peaks that have only a single tag, where -F 1 is used, or have their tags located on
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
127 only a single coordinate (called Singletons, where stddev=0 in the output file). However, low coverage datasets might
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
128 be improved by including them, if additional analysis (e.g., motif discovery) validates them. In addition, idealized
ebafcd6c3e0e Uploaded
greg
parents: 12
diff changeset
129 action of the exonuclease in ChIP-exo might place all tags for a peak on a single coordinate.
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
130
6
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
131 -----
fa85ca6c9cf8 Uploaded
greg
parents: 3
diff changeset
132
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
133 **Options**
0368815ae4d5 Uploaded
greg
parents:
diff changeset
134
12
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
135 * **Sigma to use when smoothing reads** - Smooths clusters of tags via a Gaussian distribution.
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
136 * **Peak exclusion zone** - Exclusion zone around each peak, eliminating all other peaks on the same strand that are within a ± bp distance of the peak.
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
137 * **Exclusion zone of upstream called peaks** - Defines the exclusion zone centered over peaks upstream of a peak.
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
138 * **Exclusion zone of downstream called peaks** - Defines the exclusion zone centered over peaks downstream of a peak.
cd105fdfb0da Uploaded
greg
parents: 11
diff changeset
139 * **Filter** - Absolute read filter, restricts output to only peaks with larger peak height.
0
0368815ae4d5 Uploaded
greg
parents:
diff changeset
140 </help>
0368815ae4d5 Uploaded
greg
parents:
diff changeset
141 <expand macro="citations" />
0368815ae4d5 Uploaded
greg
parents:
diff changeset
142 </tool>