Mercurial > repos > rnateam > paralyzer
comparison paralyzer.xml @ 0:d4d72f60e8d6 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/paralyzer commit d0cc3dca3aafecf306a0bfb0cd1268b4d5b3e244"
author | rnateam |
---|---|
date | Wed, 23 Oct 2019 19:07:59 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d4d72f60e8d6 |
---|---|
1 <tool id="paralyzer" name="PARalyzer" version="1.5"> | |
2 <description>A method to map interaction sites between RNA-binding proteins | |
3 and their targets</description> | |
4 <requirements> | |
5 <!-- conda dependency --> | |
6 <requirement type="package" version="1.5">paralyzer</requirement> | |
7 <requirement type="package" version="324">ucsc-fatotwobit</requirement> | |
8 </requirements> | |
9 <command detect_errors="aggressive"><![CDATA[ | |
10 | |
11 #if $refGenomeSource.genomeSource == "history": | |
12 faToTwoBit '$refGenomeSource.ownFile' ownFile.2bit | |
13 && | |
14 #end if | |
15 | |
16 ## execute paralyzer | |
17 ## note the 2nd argument is the memory parameter | |
18 ## the parameter can be override in job_conf.xml e.g. | |
19 ## <env id="_JAVA_OPTIONS">-Xmx12G</env> | |
20 PARalyzer 2G $input_ini | |
21 | |
22 ]]> | |
23 </command> | |
24 <configfiles> | |
25 <configfile name="input_ini"> | |
26 ## genome source | |
27 #if $refGenomeSource.genomeSource == "history": | |
28 GENOME_2BIT_FILE=ownFile.2bit | |
29 #else | |
30 GENOME_2BIT_FILE=$refGenomeSource.builtin.fields.path | |
31 #end if | |
32 | |
33 SAM_FILE=$input_sam$collapse | |
34 | |
35 #if $methods.choice == "ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL": | |
36 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=$methods.max_num | |
37 #else: | |
38 $methods.choice | |
39 #end if | |
40 | |
41 #if $conversion.selection == "custom": | |
42 CONVERSION=$conversion.character_from>$conversion.character_to | |
43 #end if | |
44 | |
45 ## required parameters | |
46 #if $params.settingsType == "custom": | |
47 BANDWIDTH=$params.BANDWIDTH | |
48 MINIMUM_READ_COUNT_PER_GROUP=$params.min_read_group | |
49 MINIMUM_READ_COUNT_PER_CLUSTER=$params.min_read_cluster | |
50 MINIMUM_READ_COUNT_FOR_KDE=$params.min_read_kde | |
51 MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION=$params.min_read_cluster_inc | |
52 MINIMUM_CLUSTER_SIZE=$params.min_cluster_size | |
53 MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER=$params.min_conv_loc_cluster | |
54 MINIMUM_CONVERSION_COUNT_FOR_CLUSTER=$params.min_conv_cluster | |
55 MINIMUM_READ_LENGTH=$params.min_read_len | |
56 MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=$params.max_num_conv_mis | |
57 #end if | |
58 | |
59 OUTPUT_DISTRIBUTIONS_FILE=out.distribution | |
60 OUTPUT_GROUPS_FILE=out.groups | |
61 OUTPUT_CLUSTERS_FILE=out.clusters | |
62 </configfile> | |
63 </configfiles> | |
64 <inputs> | |
65 <param name="input_sam" type="data" | |
66 format="sam" label="Alignment" | |
67 help="The sam file containing alignment of the read data."/> | |
68 | |
69 <param name="collapse" type="boolean" truevalue="=COLLAPSED" | |
70 falsevalue="" checked="True" | |
71 label="Incorporate the read copy number" | |
72 help="If reads were collapse before alignment and you want | |
73 to incorporate the read copy number, please select Yes, | |
74 otherwise select No" /> | |
75 | |
76 <!-- Genome source. --> | |
77 <conditional name="refGenomeSource"> | |
78 <param name="genomeSource" type="select" | |
79 label="Will you select a reference genome from your | |
80 history or use a built-in genome?" | |
81 help="The version of genome against which the reads were aligned."> | |
82 <option value="2bit" selected="True"> | |
83 Use a built-in genome</option> | |
84 <option value="history"> | |
85 Use a genome from my current history</option> | |
86 </param> | |
87 <when value="2bit"> | |
88 <param name="builtin" type="select" | |
89 label="Select a reference genome"> | |
90 <options from_data_table="lastz_seqs"> | |
91 <filter type="sort_by" column="1" /> | |
92 <validator type="no_options" | |
93 message="A built-in reference genome is not available | |
94 for the build associated with the selected input file"/> | |
95 </options> | |
96 </param> | |
97 </when> | |
98 <when value="history"> | |
99 <param name="ownFile" type="data" format="fasta" | |
100 label="Select the reference genome" /> | |
101 </when> | |
102 </conditional> | |
103 | |
104 <conditional name="methods"> | |
105 <param name="choice" type="select" | |
106 label="Please select one of the approaches" | |
107 help=""> | |
108 <option value="EXTEND_BY_READ"> | |
109 EXTEND_BY_READ | |
110 </option> | |
111 <option value="HAFFNER_APPROACH"> | |
112 HAFFNER_APPROACH | |
113 </option> | |
114 <option value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL"> | |
115 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL | |
116 </option> | |
117 </param> | |
118 <when value="EXTEND_BY_READ" /> | |
119 <when value="HAFFNER_APPROACH" /> | |
120 <when value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL"> | |
121 <param name="max_num" type="integer" | |
122 value="0" label="The maximum number" | |
123 help=" | |
124 The maximum number of reads to extend beyond the positive | |
125 signal in each direction (default 0) the cluster is defined | |
126 "> | |
127 <validator type="in_range" | |
128 message="Minimum allowed value is 0" min="0"/> | |
129 </param> | |
130 </when> | |
131 </conditional> | |
132 | |
133 <conditional name="conversion"> | |
134 <param name="selection" type="select" | |
135 label="Conversion" | |
136 help="Please specify characters"> | |
137 <option value="default">Use defaults: T to C</option> | |
138 <option value="custom">Specify other characters</option> | |
139 </param> | |
140 <when value="default" /> | |
141 <when value="custom"> | |
142 <param name="character_from" type="text" | |
143 size="1" value="T" label="Character from" | |
144 help="Character representing the modified ribonucleotide | |
145 (default 'T')"> | |
146 </param> | |
147 <param name="character_to" type="text" | |
148 size="1" value="C" label="Character to" | |
149 help="Character representing what the modified ribonucleotide | |
150 is read as by rTranscriptase (default 'C')"> | |
151 </param> | |
152 </when> | |
153 </conditional> | |
154 | |
155 <!-- optional parameters --> | |
156 <conditional name="params"> | |
157 <param name="settingsType" type="select" | |
158 label="Required parameters" | |
159 help="You can use the default settings or | |
160 set custom values for any of paralyzer's parameters."> | |
161 <option value="default">Use defaults</option> | |
162 <option value="custom">Full parameter list</option> | |
163 </param> | |
164 <when value="default" /> | |
165 <!-- Full/advanced params. --> | |
166 <when value="custom"> | |
167 <param name="BANDWIDTH" type="integer" | |
168 value="3" label="BANDWIDTH" | |
169 help="Size of bandwidth for KDE calculation (default 3)"> | |
170 <validator type="in_range" | |
171 message="Minimum allowed value is 1" min="1"/> | |
172 </param> | |
173 | |
174 <param name="min_read_group" type="integer" | |
175 value="5" label="MINIMUM_READ_COUNT_PER_GROUP" | |
176 help="Minimum number of reads required to call a group (default 5)"> | |
177 <validator type="in_range" | |
178 message="Minimum allowed value is 1" min="1"/> | |
179 </param> | |
180 | |
181 <param name="min_read_cluster" type="integer" | |
182 value="2" label="MINIMUM_READ_COUNT_PER_CLUSTER" | |
183 help="Minimum number of reads required to call a cluster (default 2)"> | |
184 <validator type="in_range" | |
185 message="Minimum allowed value is 1" min="1"/> | |
186 </param> | |
187 | |
188 <param name="min_read_kde" type="integer" | |
189 value="3" label="MINIMUM_READ_COUNT_FOR_KDE" | |
190 help="Minimum read depth at a location to | |
191 make a KDE estimate (default 3)"> | |
192 <validator type="in_range" | |
193 message="Minimum allowed value is 1" min="1"/> | |
194 </param> | |
195 | |
196 <param name="min_read_cluster_inc" type="integer" | |
197 value="1" label="MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION" | |
198 help="Minimum read depth for a location to be included | |
199 within a cluster (default 1)"> | |
200 <validator type="in_range" | |
201 message="Minimum allowed value is 1" min="1"/> | |
202 </param> | |
203 | |
204 <param name="min_cluster_size" type="integer" | |
205 value="11" label="MINIMUM_CLUSTER_SIZE" | |
206 help="Minimum length required for a cluster | |
207 to be reported (default 11)"> | |
208 <validator type="in_range" | |
209 message="Minimum allowed value is 1" min="1"/> | |
210 </param> | |
211 | |
212 <param name="min_conv_loc_cluster" type="integer" | |
213 value="2" label="MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER" | |
214 help="Minimum number of separate locations to have a | |
215 reported conversion for a cluster to be | |
216 reported (default 2)"> | |
217 <validator type="in_range" | |
218 message="Minimum allowed value is 1" min="1"/> | |
219 </param> | |
220 | |
221 <param name="min_conv_cluster" type="integer" | |
222 value="2" label="MINIMUM_CONVERSION_COUNT_FOR_CLUSTER" | |
223 help="Minimum number of conversion events within a | |
224 region to report a cluster (default 2)"> | |
225 <validator type="in_range" | |
226 message="Minimum allowed value is 1" min="1"/> | |
227 </param> | |
228 | |
229 <param name="min_read_len" type="integer" | |
230 value="20" label="MINIMUM_READ_LENGTH" | |
231 help="Minimum length of mapped read to be included | |
232 in the analysis (default 20)"> | |
233 <validator type="in_range" | |
234 message="Minimum allowed value is 1" min="1"/> | |
235 </param> | |
236 | |
237 <param name="max_num_conv_mis" type="integer" | |
238 value="1" label="MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES" | |
239 help="Maximum number of non-conversion mismatches of | |
240 a mapped read to be included in the analysis (default 1)"> | |
241 <validator type="in_range" | |
242 message="Minimum allowed value is 1" min="1"/> | |
243 </param> | |
244 </when> <!-- full --> | |
245 </conditional> | |
246 </inputs> | |
247 <outputs> | |
248 <data name="distribution" format="txt" | |
249 from_work_dir="out.distribution" | |
250 label="${tool.name} on ${on_string}: DISTRIBUTIONS"/> | |
251 | |
252 <data name="groups" format="txt" | |
253 from_work_dir="out.groups" | |
254 label="${tool.name} on ${on_string}: GROUPS"/> | |
255 | |
256 <data name="clusters" format="txt" | |
257 from_work_dir="out.clusters" | |
258 label="${tool.name} on ${on_string}: CLUSTERS"/> | |
259 | |
260 <!-- | |
261 <data name="PARalyzer_Utilized" format="sam" | |
262 from_work_dir="out_PARalyzer_Utilized.sam" | |
263 label="${tool.name} on ${on_string}: PARalyzer_Utilized.sam"/> | |
264 --> | |
265 </outputs> | |
266 <tests> | |
267 <test> | |
268 <param name="input_sam" value="input.sam" ftype="sam" /> | |
269 <param name="genomeSource" value="history" /> | |
270 <param name="ownFile" value="input.fa" /> | |
271 <param name="choice" value="EXTEND_BY_READ" /> | |
272 <param name="selection" value="custom" /> | |
273 <param name="character_from" value="T" /> | |
274 <param name="character_to" value="C" /> | |
275 <param name="settingsType" value="custom" /> | |
276 <param name="$BANDWIDTH" value="3" /> | |
277 <param name="min_read_group" value="5" /> | |
278 <param name="min_read_cluster" value="2" /> | |
279 <param name="min_read_kde" value="3" /> | |
280 <param name="min_read_cluster_inc" value="1" /> | |
281 <param name="min_cluster_size" value="11" /> | |
282 <param name="min_conv_loc_cluster" value="2" /> | |
283 <param name="min_conv_cluster" value="2" /> | |
284 <param name="min_read_len" value="20" /> | |
285 <param name="max_num_conv_mis" value="1" /> | |
286 <output name="distribution" file="out.distribution" | |
287 ftype="txt"/> | |
288 <output name="groups" file="out.groups" | |
289 ftype="txt"/> | |
290 <output name="clusters" file="out.clusters" | |
291 ftype="txt"/> | |
292 <!-- | |
293 <output name="PARalyzer_Utilized" file="out_PARalyzer_Utilized.sam" | |
294 ftype="sam"/> | |
295 --> | |
296 </test> | |
297 </tests> | |
298 <help> | |
299 <![CDATA[ | |
300 .. class:: infomark | |
301 | |
302 **What it does** | |
303 | |
304 `paralyzer`_ is an algorithm to generate a high resolution | |
305 map of interaction sites between RNA-binding proteins and their targets. The | |
306 algorithm utilizes the deep sequencing reads generated by `PAR-CLIP`_ | |
307 (Photoactivatable-Ribonucleoside-Enhanced Crosslinking and | |
308 Immunoprecipitation) protocol.The use of photoactivatable nucleotides in the | |
309 PAR-CLIP protocol results in more efficient crosslinking between the | |
310 RNA-binding protein and its target relative to other CLIP methods; in addition | |
311 a nucleotide substitution occurs at the site of crosslinking, providing for | |
312 single-nucleotide resolution binding information. PARalyzer utilizes this | |
313 nucleotide substition in a kernel density estimate classifier to generate | |
314 the high resolution set of Protein-RNA interaction sites. | |
315 | |
316 .. _paralyzer: https://ohlerlab.mdc-berlin.de/software/PARalyzer_85/ | |
317 .. _PAR-CLIP: http://www.ncbi.nlm.nih.gov/pubmed/20371350 | |
318 | |
319 .. class:: infomark | |
320 | |
321 **Approaches** | |
322 | |
323 ``EXTEND_BY_READ``: including this line means that the cluster will be extended | |
324 beyond the signal to include a region such that it extends to | |
325 the end of any read that falls within the cluster and contained | |
326 a conversion, or until the minimum read depth | |
327 (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met | |
328 | |
329 ``HAFNER_APPROACH``: identifies the location with the largest number of conversion | |
330 events and extends the cluster up to | |
331 ( parameter ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL)nt | |
332 in each direction from that point, or until the minimum | |
333 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met | |
334 | |
335 ``ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL``: the maximum number of reads to | |
336 extend beyond the positive signal in each direction (default 0) | |
337 the cluster is defined as the region where the conversion KDE is above | |
338 the background KDE and then extended up to #integer#, or until the minimum | |
339 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met | |
340 | |
341 .. class:: infomark | |
342 | |
343 **Outputs** | |
344 | |
345 DISTRIBUTIONS: contains the signal KDE, background KDE, read count & conversion for all locations within each group | |
346 * The data will be in blocks of four lines for each group | |
347 * groups on the reverse strand do not need to be reversed; the values always equal nucleotdies from GroupStart to GroupEnd, regardless of Strand | |
348 * First Column = Chromosome = chromosome on which the group resides | |
349 * Second Column = Strand = orientation in which the group resides | |
350 * Third Column = GroupStart = beginning coordinate on the chromosome of the group | |
351 * Fourth Column = GroupEnd = ending coordinate on the chromosome of the group | |
352 * Fifth Column = GroupID = unique ID for the group | |
353 * Sixth Column = Information = reports if the current line contains the Signal, Background, Conversion Percent, or ReadCount | |
354 * All nucleotides that do not have any possibility of having a conversion event are given a value of -1 | |
355 * All Subsequent Columns: the values for each nucleotide from GroupStart until GroupEnd | |
356 | |
357 | |
358 GROUPS: a comma separated file containing the information about the resulting groups | |
359 * Chromosome = chromosome on which the group resides | |
360 * Strand = orientation in which the group resides | |
361 * GroupStart = beginning coordinate on the chromosome of the group | |
362 * GroupEnd = ending coordinate on the chromosome of the group | |
363 * GroupID = unique ID for the group | |
364 * ReadCount = number of reads within the group | |
365 | |
366 CLUSTERS: a comma separated file containing the information about the resulting clusters | |
367 * Chromosome = chromosome on which the cluster resides | |
368 * Strand = orientation in which the cluster resides | |
369 * ClusterStart = beginning coordinate on the chromosome of the cluster | |
370 * ClusterEnd = ending coordinate on the chromosome of the cluster | |
371 * ClusterID = unique ID for the cluster | |
372 * ClusterSequence = sequence of the cluster | |
373 * ReadCount = number of reads that overlap the cluster by at least 1 nucleotide | |
374 * ModeLocation = coordinate of the location with the highest signal / (signal + background) value | |
375 * ModeScore = score of the highest signal / (signal + background) value | |
376 * ConversionLocationCount = number of unique location where at least 1 conversion occurred | |
377 * ConversionEventCount = total number of conversions that occurred within the cluster | |
378 * NonConversionEventCount = total number of possible conversion events that did not occur | |
379 | |
380 ]]></help> | |
381 <citations> | |
382 <citation type="doi">10.1186/gb-2011-12-8-r79</citation> | |
383 </citations> | |
384 </tool> |