annotate cwpair2.xml @ 12:79a10fe09b66 draft default tip

Uploaded
author greg
date Wed, 16 Dec 2015 19:41:57 -0500
parents 6383cae47688
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
507521bf124a Uploaded
greg
parents:
diff changeset
1 <?xml version="1.0"?>
507521bf124a Uploaded
greg
parents:
diff changeset
2 <tool id="cwpair2" name="CWPair2" version="@WRAPPER_VERSION@.0">
507521bf124a Uploaded
greg
parents:
diff changeset
3 <description>find matched pairs and unmatched orphans</description>
507521bf124a Uploaded
greg
parents:
diff changeset
4 <macros>
507521bf124a Uploaded
greg
parents:
diff changeset
5 <import>cwpair2_macros.xml</import>
507521bf124a Uploaded
greg
parents:
diff changeset
6 </macros>
507521bf124a Uploaded
greg
parents:
diff changeset
7 <expand macro="requirements" />
507521bf124a Uploaded
greg
parents:
diff changeset
8 <command>
507521bf124a Uploaded
greg
parents:
diff changeset
9 <![CDATA[
507521bf124a Uploaded
greg
parents:
diff changeset
10 python $__tool_directory__/cwpair2.py
507521bf124a Uploaded
greg
parents:
diff changeset
11 #for $i in $input:
507521bf124a Uploaded
greg
parents:
diff changeset
12 --input "${i}" "${i.hid}"
507521bf124a Uploaded
greg
parents:
diff changeset
13 #end for
507521bf124a Uploaded
greg
parents:
diff changeset
14 --up_distance $up_distance
507521bf124a Uploaded
greg
parents:
diff changeset
15 --down_distance $down_distance
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
16 --method $method
0
507521bf124a Uploaded
greg
parents:
diff changeset
17 --binsize $binsize
507521bf124a Uploaded
greg
parents:
diff changeset
18 --threshold_format $threshold_format_cond.threshold_format
507521bf124a Uploaded
greg
parents:
diff changeset
19 #if str($threshold_format_cond.threshold_format) == "absolute_threshold":
507521bf124a Uploaded
greg
parents:
diff changeset
20 --absolute_threshold $threshold_format_cond.absolute_threshold
507521bf124a Uploaded
greg
parents:
diff changeset
21 #elif str($threshold_format_cond.threshold_format) == "relative_threshold":
507521bf124a Uploaded
greg
parents:
diff changeset
22 --relative_threshold $threshold_format_cond.relative_threshold
507521bf124a Uploaded
greg
parents:
diff changeset
23 #end if
4
9ed566138ecb Uploaded
greg
parents: 0
diff changeset
24 --output_files $output_files
0
507521bf124a Uploaded
greg
parents:
diff changeset
25 --statistics_output "$statistics_output"
507521bf124a Uploaded
greg
parents:
diff changeset
26 ]]>
507521bf124a Uploaded
greg
parents:
diff changeset
27 </command>
507521bf124a Uploaded
greg
parents:
diff changeset
28 <inputs>
507521bf124a Uploaded
greg
parents:
diff changeset
29 <param name="input" type="data" format="gff" multiple="True" label="Find matched pairs on" />
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
30 <param name="up_distance" type="integer" value="50" min="0" label="Distance upstream from a peak to allow a pair" help="The maximum distance upstream or 5’ to the primary peak"/>
6383cae47688 Uploaded
greg
parents: 10
diff changeset
31 <param name="down_distance" type="integer" value="100" min="0" label="Distance downstream from a peak to allow a pair" help="The maximum distance downstream or 3’ to the primary peak"/>
0
507521bf124a Uploaded
greg
parents:
diff changeset
32 <param name="method" type="select" label="Method of finding a match">
507521bf124a Uploaded
greg
parents:
diff changeset
33 <option value="mode" selected="True">Mode</option>
507521bf124a Uploaded
greg
parents:
diff changeset
34 <option value="closest">Closest</option>
507521bf124a Uploaded
greg
parents:
diff changeset
35 <option value="largest">Largest</option>
507521bf124a Uploaded
greg
parents:
diff changeset
36 <option value="all">All</option>
507521bf124a Uploaded
greg
parents:
diff changeset
37 </param>
507521bf124a Uploaded
greg
parents:
diff changeset
38 <param name="binsize" type="integer" value="1" min="0" label="Width of bins for frequency plots and mode calculation" help="Value 1 implies no bins" />
507521bf124a Uploaded
greg
parents:
diff changeset
39 <conditional name="threshold_format_cond">
507521bf124a Uploaded
greg
parents:
diff changeset
40 <param name="threshold_format" type="select" label="Filter using">
507521bf124a Uploaded
greg
parents:
diff changeset
41 <option value="relative_threshold" selected="True">Relative threshold</option>
507521bf124a Uploaded
greg
parents:
diff changeset
42 <option value="absolute_threshold">Absolute threshold</option>
507521bf124a Uploaded
greg
parents:
diff changeset
43 </param>
507521bf124a Uploaded
greg
parents:
diff changeset
44 <when value="relative_threshold">
507521bf124a Uploaded
greg
parents:
diff changeset
45 <param name="relative_threshold" type="float" value="0.0" min="0.0" label="Percentage of the 95 percentile value to filter below" help="Value 0 results in no filtering" />
507521bf124a Uploaded
greg
parents:
diff changeset
46 </when>
507521bf124a Uploaded
greg
parents:
diff changeset
47 <when value="absolute_threshold">
507521bf124a Uploaded
greg
parents:
diff changeset
48 <param name="absolute_threshold" type="float" value="0.0" min="0.0" label="Absolute value to filter below" />
507521bf124a Uploaded
greg
parents:
diff changeset
49 </when>
507521bf124a Uploaded
greg
parents:
diff changeset
50 </conditional>
4
9ed566138ecb Uploaded
greg
parents: 0
diff changeset
51 <param name="output_files" type="select" label="Restrict output to" help="Statistics will always be generated." >
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
52 <option value="all" selected="True">no restrictions (output everything: C,D,F,O,P,MP)</option>
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
53 <option value="matched_pair">matched pairs only (MP)</option>
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
54 <option value="matched_pair_orphan">matched pairs and orphans only (O,MP)</option>
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
55 <option value="matched_pair_orphan_detail">matched pairs, orphans and details only (D,O,MP)</option>
0
507521bf124a Uploaded
greg
parents:
diff changeset
56 </param>
507521bf124a Uploaded
greg
parents:
diff changeset
57 </inputs>
507521bf124a Uploaded
greg
parents:
diff changeset
58 <outputs>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
59 <data name="statistics_output" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
60 <collection name="H" type="list" label="Statistics Histogram: ${tool.name} on ${on_string}">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
61 <filter>output_files == "all"</filter>
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
62 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="H" ext="pdf" visible="false" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
63 </collection>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
64 <collection name="D" type="list" label="Data D: ${tool.name} on ${on_string}">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
65 <filter>output_files in ["all", "matched_pair_orphan_detail"]</filter>
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
66 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_D" ext="tabular" visible="false" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
67 </collection>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
68 <collection name="O" type="list" label="Data O: ${tool.name} on ${on_string}">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
69 <filter>output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"]</filter>
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
70 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_O" ext="tabular" visible="false" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
71 </collection>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
72 <collection name="MP" type="list" label="Data MP: ${tool.name} on ${on_string}">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
73 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_MP" ext="gff" visible="false" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
74 </collection>
507521bf124a Uploaded
greg
parents:
diff changeset
75 </outputs>
507521bf124a Uploaded
greg
parents:
diff changeset
76 <tests>
507521bf124a Uploaded
greg
parents:
diff changeset
77 <test>
507521bf124a Uploaded
greg
parents:
diff changeset
78 <param name="input" value="cwpair2_input1.gff" />
507521bf124a Uploaded
greg
parents:
diff changeset
79 <param name="up_distance" value="25" />
507521bf124a Uploaded
greg
parents:
diff changeset
80 <param name="down_distance" value="100" />
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
81 <param name="method" value="all" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
82 <param name="binsize" value="1" />
507521bf124a Uploaded
greg
parents:
diff changeset
83 <param name="threshold_format" value="relative_threshold" />
507521bf124a Uploaded
greg
parents:
diff changeset
84 <param name="relative_threshold" value="0.0" />
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
85 <param name="output_files" value="matched_pair" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
86 <output name="statistics_output" file="statistics1.tabular" ftype="tabular" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
87 <output_collection name="MP" type="list">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
88 <element name="f0u25d100_on_data_1" file="closest_mp_output1.gff" ftype="gff" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
89 <element name="f0u25d100_on_data_1" file="largest_mp_output1.gff" ftype="gff" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
90 <element name="f0u25d100_on_data_1" file="mode_mp_output1.gff" ftype="gff" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
91 </output_collection>
507521bf124a Uploaded
greg
parents:
diff changeset
92 </test>
507521bf124a Uploaded
greg
parents:
diff changeset
93 <test>
507521bf124a Uploaded
greg
parents:
diff changeset
94 <param name="input" value="cwpair2_input1.gff" />
507521bf124a Uploaded
greg
parents:
diff changeset
95 <param name="up_distance" value="50" />
507521bf124a Uploaded
greg
parents:
diff changeset
96 <param name="down_distance" value="100" />
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
97 <param name="method" value="all" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
98 <param name="binsize" value="1" />
507521bf124a Uploaded
greg
parents:
diff changeset
99 <param name="threshold_format" value="relative_threshold" />
507521bf124a Uploaded
greg
parents:
diff changeset
100 <param name="relative_threshold" value="0.0" />
507521bf124a Uploaded
greg
parents:
diff changeset
101 <param name="output_files" value="all" />
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
102 <output name="statistics_output" file="statistics2.tabular" ftype="tabular" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
103 <output_collection name="H" type="list">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
104 <element name="histogram_C_mode_f0u50d100_on_data_1" file="mode_c_output2.pdf" ftype="pdf" compare="sim_size" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
105 <element name="histogram_F_closest_f0u50d100_on_data_1" file="closest_f_output2.pdf" ftype="pdf" compare="sim_size" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
106 <element name="histogram_F_largest_f0u50d100_on_data_1" file="largest_f_output2.pdf" ftype="pdf" compare="sim_size" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
107 <element name="histogram_F_mode_f0u50d100_on_data_1" file="mode_f_output2.pdf" ftype="pdf" compare="sim_size" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
108 <element name="histogram_P_mode_f0u50d100_on_data_1" file="mode_p_output2.pdf" ftype="pdf" compare="sim_size" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
109 </output_collection>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
110 <output_collection name="D" type="list">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
111 <element name="data_D_closest_f0u50d100_on_data_1" file="closest_d_output2.tabular" ftype="tabular" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
112 <element name="data_D_largest_f0u50d100_on_data_1" file="largest_d_output2.tabular" ftype="tabular" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
113 <element name="data_D_mode_f0u50d100_on_data_1" file="mode_d_output2.tabular" ftype="tabular" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
114 </output_collection>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
115 <output_collection name="O" type="list">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
116 <element name="data_O_closest_f0u50d100_on_data_1" file="closest_o_output2.tabular" ftype="tabular" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
117 <element name="data_O_largest_f0u50d100_on_data_1" file="largest_o_output2.tabular" ftype="tabular" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
118 <element name="data_O_mode_f0u50d100_on_data_1" file="mode_o_output2.tabular" ftype="tabular" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
119 </output_collection>
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
120 <output_collection name="MP" type="list">
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
121 <element name="data_MP_closest_f0u50d100_on_data_1" file="closest_mp_output2.gff" ftype="gff" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
122 <element name="data_MP_largest_f0u50d100_on_data_1" file="largest_mp_output2.gff" ftype="gff" />
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
123 <element name="data_MP_mode_f0u50d100_on_data_1" file="mode_mp_output2.gff" ftype="gff" />
0
507521bf124a Uploaded
greg
parents:
diff changeset
124 </output_collection>
507521bf124a Uploaded
greg
parents:
diff changeset
125 </test>
507521bf124a Uploaded
greg
parents:
diff changeset
126 </tests>
507521bf124a Uploaded
greg
parents:
diff changeset
127 <help>
507521bf124a Uploaded
greg
parents:
diff changeset
128 **What it does**
507521bf124a Uploaded
greg
parents:
diff changeset
129
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
130 CWPair accepts one or more gff files as input and takes the peak location to be the midpoint between the
6383cae47688 Uploaded
greg
parents: 10
diff changeset
131 exclusion zone start and end coordinate (columns D and E). CWPair starts with the highest peak (primary peak)
6383cae47688 Uploaded
greg
parents: 10
diff changeset
132 in the dataset, and then looks on the opposite strand for another peak located within the distance defined by
6383cae47688 Uploaded
greg
parents: 10
diff changeset
133 a combination of the tool's **Distance upstream from a peak to allow a pair** (the distance upstream or 5’ to
6383cae47688 Uploaded
greg
parents: 10
diff changeset
134 the primary peak) and **Distance downstream from a peak to allow a pair** (the distance downstream or 3’ to the
6383cae47688 Uploaded
greg
parents: 10
diff changeset
135 primary peak) parameters. So "upstream" value 30 "downstream" value 20 makes the tool look 30 bp upstream and
6383cae47688 Uploaded
greg
parents: 10
diff changeset
136 20 bp downstream (inclusive). Consequently, the search space would be 51 bp, since it includes the primary peak
6383cae47688 Uploaded
greg
parents: 10
diff changeset
137 coordinate. The use of a negative number changes the direction of the search limits. So, "upstream" -30 and
6383cae47688 Uploaded
greg
parents: 10
diff changeset
138 "downstream" 20 produces an 11 bp downstream search window (20-30 bp downstream, inclusive).
0
507521bf124a Uploaded
greg
parents:
diff changeset
139
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
140 .. image:: $PATH_TO_IMAGES/cwpair2.png
6383cae47688 Uploaded
greg
parents: 10
diff changeset
141
6383cae47688 Uploaded
greg
parents: 10
diff changeset
142 When encountering multiple candidate peaks within the search window, CWPair uses the resolution method defined by
6383cae47688 Uploaded
greg
parents: 10
diff changeset
143 the tool's **Method of finding a match** parameter as follows:
6383cae47688 Uploaded
greg
parents: 10
diff changeset
144
6383cae47688 Uploaded
greg
parents: 10
diff changeset
145
6383cae47688 Uploaded
greg
parents: 10
diff changeset
146 * **mode** - This is an iterative process in which all peak-pair distances within the search window are determined, and the mode calculated. The pair whose distance apart is closest to the mode is then selected.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
147 * **closest** - Pairs the peak that has the closest absolute distance from the primary peak.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
148 * **largest** - Pairs the peak that has the highest tag count.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
149 * **all** - Runs all three methods, producing separate outputs for each.
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
150
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
151 When considering the candidate peaks for pairing to a primary peak, a tag-count threshold may also be set using
6383cae47688 Uploaded
greg
parents: 10
diff changeset
152 the tool's **Filter using relative/absolute threshold** parameter. A relative threshold determines the tag counts
6383cae47688 Uploaded
greg
parents: 10
diff changeset
153 at the 95th percentile of peak occupancy (i.e. top 5% in terms of tag counts), then uses a tag count threshold at
6383cae47688 Uploaded
greg
parents: 10
diff changeset
154 the specified percentage of this 95th percentile. So if the peak at the 95th percentile has 200 tags, and "relative
6383cae47688 Uploaded
greg
parents: 10
diff changeset
155 threshold" 50 is used, then it will not consider any peak having less than 100 tags.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
156
6383cae47688 Uploaded
greg
parents: 10
diff changeset
157 -----
6383cae47688 Uploaded
greg
parents: 10
diff changeset
158
12
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
159 **Options**
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
160
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
161 * **Method of finding match** - Method of finding matched pair, mode, closest, largest, or all (run with each method).
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
162 * **Distance upstream from a peak to allow a pair** - The maximum distance (inclusive) upstream on the opposite strand from the primary peak to locate another peak, resulting in a pair.
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
163 * **Distance downstream from a peak to allow a pair** - The maximum distance (inclusive) downstream on the opposite strand from the primary peak to locate another peak, resulting in a pair.
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
164 * **Percentage of the 95 percentile value to filter below** - Percentage of the 95 percentile value below which to filter when using a relative threshold.
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
165 * **Absolute value to filter below** - Absolute value below which to filter when using an absolute threshold.
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
166 * **Output files** - Restrict output dataset collections to matched pairs only or one of several combinations of collection types.
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
167
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
168 -----
79a10fe09b66 Uploaded
greg
parents: 11
diff changeset
169
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
170 **Output Data Files**
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
171
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
172 * **closest/largest/mode MP** - gff file containing the Matched Pairs and includes the peak-pair midpoint coordinate (column D) and the coordinate +1 (column E). The tag count sum is reported in column F, along with the C-W distance in bp in column I.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
173 * **closest/largest/mode O** - tabular file containing the Orphans (all peaks that are not in pairs).
6383cae47688 Uploaded
greg
parents: 10
diff changeset
174 * **closest/largest/mode D** - tabular file containing the Details, which lists + and – strand information separately. The start and end represent the lower and higher coordinates of the exclusion zone from GeneTrack, and “Value” is the tag count sum within the exclusion zone. The peak pair midpoint is calculated along with the distance between the two paired peaks (midpoint-to-midpoint or C-W distance).
10
b52d6705aed0 Uploaded
greg
parents: 7
diff changeset
175
11
6383cae47688 Uploaded
greg
parents: 10
diff changeset
176 **Output Statistics Files**
6383cae47688 Uploaded
greg
parents: 10
diff changeset
177
6383cae47688 Uploaded
greg
parents: 10
diff changeset
178 * **closest/largest/mode C** - pdf file that provides the frequency distribution of peak pair distances.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
179 * **closest/largest/mode P** - pdf file that provides the preview plots graph (the initial iteration of the process for finding the mode).
6383cae47688 Uploaded
greg
parents: 10
diff changeset
180 * **closest/largest/mode F** - pdf file that provides the final plots graph.
6383cae47688 Uploaded
greg
parents: 10
diff changeset
181 * **Statistics Table** - provides the number of peaks in pairs (dividing this by 2 provides the number of peak-pairs).
0
507521bf124a Uploaded
greg
parents:
diff changeset
182
507521bf124a Uploaded
greg
parents:
diff changeset
183 </help>
507521bf124a Uploaded
greg
parents:
diff changeset
184 <expand macro="citations" />
507521bf124a Uploaded
greg
parents:
diff changeset
185 </tool>