comparison paralyzer.xml @ 0:d4d72f60e8d6 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/paralyzer commit d0cc3dca3aafecf306a0bfb0cd1268b4d5b3e244"
author rnateam
date Wed, 23 Oct 2019 19:07:59 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d4d72f60e8d6
1 <tool id="paralyzer" name="PARalyzer" version="1.5">
2 <description>A method to map interaction sites between RNA-binding proteins
3 and their targets</description>
4 <requirements>
5 <!-- conda dependency -->
6 <requirement type="package" version="1.5">paralyzer</requirement>
7 <requirement type="package" version="324">ucsc-fatotwobit</requirement>
8 </requirements>
9 <command detect_errors="aggressive"><![CDATA[
10
11 #if $refGenomeSource.genomeSource == "history":
12 faToTwoBit '$refGenomeSource.ownFile' ownFile.2bit
13 &&
14 #end if
15
16 ## execute paralyzer
17 ## note the 2nd argument is the memory parameter
18 ## the parameter can be override in job_conf.xml e.g.
19 ## <env id="_JAVA_OPTIONS">-Xmx12G</env>
20 PARalyzer 2G $input_ini
21
22 ]]>
23 </command>
24 <configfiles>
25 <configfile name="input_ini">
26 ## genome source
27 #if $refGenomeSource.genomeSource == "history":
28 GENOME_2BIT_FILE=ownFile.2bit
29 #else
30 GENOME_2BIT_FILE=$refGenomeSource.builtin.fields.path
31 #end if
32
33 SAM_FILE=$input_sam$collapse
34
35 #if $methods.choice == "ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL":
36 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=$methods.max_num
37 #else:
38 $methods.choice
39 #end if
40
41 #if $conversion.selection == "custom":
42 CONVERSION=$conversion.character_from>$conversion.character_to
43 #end if
44
45 ## required parameters
46 #if $params.settingsType == "custom":
47 BANDWIDTH=$params.BANDWIDTH
48 MINIMUM_READ_COUNT_PER_GROUP=$params.min_read_group
49 MINIMUM_READ_COUNT_PER_CLUSTER=$params.min_read_cluster
50 MINIMUM_READ_COUNT_FOR_KDE=$params.min_read_kde
51 MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION=$params.min_read_cluster_inc
52 MINIMUM_CLUSTER_SIZE=$params.min_cluster_size
53 MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER=$params.min_conv_loc_cluster
54 MINIMUM_CONVERSION_COUNT_FOR_CLUSTER=$params.min_conv_cluster
55 MINIMUM_READ_LENGTH=$params.min_read_len
56 MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=$params.max_num_conv_mis
57 #end if
58
59 OUTPUT_DISTRIBUTIONS_FILE=out.distribution
60 OUTPUT_GROUPS_FILE=out.groups
61 OUTPUT_CLUSTERS_FILE=out.clusters
62 </configfile>
63 </configfiles>
64 <inputs>
65 <param name="input_sam" type="data"
66 format="sam" label="Alignment"
67 help="The sam file containing alignment of the read data."/>
68
69 <param name="collapse" type="boolean" truevalue="=COLLAPSED"
70 falsevalue="" checked="True"
71 label="Incorporate the read copy number"
72 help="If reads were collapse before alignment and you want
73 to incorporate the read copy number, please select Yes,
74 otherwise select No" />
75
76 <!-- Genome source. -->
77 <conditional name="refGenomeSource">
78 <param name="genomeSource" type="select"
79 label="Will you select a reference genome from your
80 history or use a built-in genome?"
81 help="The version of genome against which the reads were aligned.">
82 <option value="2bit" selected="True">
83 Use a built-in genome</option>
84 <option value="history">
85 Use a genome from my current history</option>
86 </param>
87 <when value="2bit">
88 <param name="builtin" type="select"
89 label="Select a reference genome">
90 <options from_data_table="lastz_seqs">
91 <filter type="sort_by" column="1" />
92 <validator type="no_options"
93 message="A built-in reference genome is not available
94 for the build associated with the selected input file"/>
95 </options>
96 </param>
97 </when>
98 <when value="history">
99 <param name="ownFile" type="data" format="fasta"
100 label="Select the reference genome" />
101 </when>
102 </conditional>
103
104 <conditional name="methods">
105 <param name="choice" type="select"
106 label="Please select one of the approaches"
107 help="">
108 <option value="EXTEND_BY_READ">
109 EXTEND_BY_READ
110 </option>
111 <option value="HAFFNER_APPROACH">
112 HAFFNER_APPROACH
113 </option>
114 <option value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL">
115 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL
116 </option>
117 </param>
118 <when value="EXTEND_BY_READ" />
119 <when value="HAFFNER_APPROACH" />
120 <when value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL">
121 <param name="max_num" type="integer"
122 value="0" label="The maximum number"
123 help="
124 The maximum number of reads to extend beyond the positive
125 signal in each direction (default 0) the cluster is defined
126 ">
127 <validator type="in_range"
128 message="Minimum allowed value is 0" min="0"/>
129 </param>
130 </when>
131 </conditional>
132
133 <conditional name="conversion">
134 <param name="selection" type="select"
135 label="Conversion"
136 help="Please specify characters">
137 <option value="default">Use defaults: T to C</option>
138 <option value="custom">Specify other characters</option>
139 </param>
140 <when value="default" />
141 <when value="custom">
142 <param name="character_from" type="text"
143 size="1" value="T" label="Character from"
144 help="Character representing the modified ribonucleotide
145 (default 'T')">
146 </param>
147 <param name="character_to" type="text"
148 size="1" value="C" label="Character to"
149 help="Character representing what the modified ribonucleotide
150 is read as by rTranscriptase (default 'C')">
151 </param>
152 </when>
153 </conditional>
154
155 <!-- optional parameters -->
156 <conditional name="params">
157 <param name="settingsType" type="select"
158 label="Required parameters"
159 help="You can use the default settings or
160 set custom values for any of paralyzer's parameters.">
161 <option value="default">Use defaults</option>
162 <option value="custom">Full parameter list</option>
163 </param>
164 <when value="default" />
165 <!-- Full/advanced params. -->
166 <when value="custom">
167 <param name="BANDWIDTH" type="integer"
168 value="3" label="BANDWIDTH"
169 help="Size of bandwidth for KDE calculation (default 3)">
170 <validator type="in_range"
171 message="Minimum allowed value is 1" min="1"/>
172 </param>
173
174 <param name="min_read_group" type="integer"
175 value="5" label="MINIMUM_READ_COUNT_PER_GROUP"
176 help="Minimum number of reads required to call a group (default 5)">
177 <validator type="in_range"
178 message="Minimum allowed value is 1" min="1"/>
179 </param>
180
181 <param name="min_read_cluster" type="integer"
182 value="2" label="MINIMUM_READ_COUNT_PER_CLUSTER"
183 help="Minimum number of reads required to call a cluster (default 2)">
184 <validator type="in_range"
185 message="Minimum allowed value is 1" min="1"/>
186 </param>
187
188 <param name="min_read_kde" type="integer"
189 value="3" label="MINIMUM_READ_COUNT_FOR_KDE"
190 help="Minimum read depth at a location to
191 make a KDE estimate (default 3)">
192 <validator type="in_range"
193 message="Minimum allowed value is 1" min="1"/>
194 </param>
195
196 <param name="min_read_cluster_inc" type="integer"
197 value="1" label="MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION"
198 help="Minimum read depth for a location to be included
199 within a cluster (default 1)">
200 <validator type="in_range"
201 message="Minimum allowed value is 1" min="1"/>
202 </param>
203
204 <param name="min_cluster_size" type="integer"
205 value="11" label="MINIMUM_CLUSTER_SIZE"
206 help="Minimum length required for a cluster
207 to be reported (default 11)">
208 <validator type="in_range"
209 message="Minimum allowed value is 1" min="1"/>
210 </param>
211
212 <param name="min_conv_loc_cluster" type="integer"
213 value="2" label="MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER"
214 help="Minimum number of separate locations to have a
215 reported conversion for a cluster to be
216 reported (default 2)">
217 <validator type="in_range"
218 message="Minimum allowed value is 1" min="1"/>
219 </param>
220
221 <param name="min_conv_cluster" type="integer"
222 value="2" label="MINIMUM_CONVERSION_COUNT_FOR_CLUSTER"
223 help="Minimum number of conversion events within a
224 region to report a cluster (default 2)">
225 <validator type="in_range"
226 message="Minimum allowed value is 1" min="1"/>
227 </param>
228
229 <param name="min_read_len" type="integer"
230 value="20" label="MINIMUM_READ_LENGTH"
231 help="Minimum length of mapped read to be included
232 in the analysis (default 20)">
233 <validator type="in_range"
234 message="Minimum allowed value is 1" min="1"/>
235 </param>
236
237 <param name="max_num_conv_mis" type="integer"
238 value="1" label="MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES"
239 help="Maximum number of non-conversion mismatches of
240 a mapped read to be included in the analysis (default 1)">
241 <validator type="in_range"
242 message="Minimum allowed value is 1" min="1"/>
243 </param>
244 </when> <!-- full -->
245 </conditional>
246 </inputs>
247 <outputs>
248 <data name="distribution" format="txt"
249 from_work_dir="out.distribution"
250 label="${tool.name} on ${on_string}: DISTRIBUTIONS"/>
251
252 <data name="groups" format="txt"
253 from_work_dir="out.groups"
254 label="${tool.name} on ${on_string}: GROUPS"/>
255
256 <data name="clusters" format="txt"
257 from_work_dir="out.clusters"
258 label="${tool.name} on ${on_string}: CLUSTERS"/>
259
260 <!--
261 <data name="PARalyzer_Utilized" format="sam"
262 from_work_dir="out_PARalyzer_Utilized.sam"
263 label="${tool.name} on ${on_string}: PARalyzer_Utilized.sam"/>
264 -->
265 </outputs>
266 <tests>
267 <test>
268 <param name="input_sam" value="input.sam" ftype="sam" />
269 <param name="genomeSource" value="history" />
270 <param name="ownFile" value="input.fa" />
271 <param name="choice" value="EXTEND_BY_READ" />
272 <param name="selection" value="custom" />
273 <param name="character_from" value="T" />
274 <param name="character_to" value="C" />
275 <param name="settingsType" value="custom" />
276 <param name="$BANDWIDTH" value="3" />
277 <param name="min_read_group" value="5" />
278 <param name="min_read_cluster" value="2" />
279 <param name="min_read_kde" value="3" />
280 <param name="min_read_cluster_inc" value="1" />
281 <param name="min_cluster_size" value="11" />
282 <param name="min_conv_loc_cluster" value="2" />
283 <param name="min_conv_cluster" value="2" />
284 <param name="min_read_len" value="20" />
285 <param name="max_num_conv_mis" value="1" />
286 <output name="distribution" file="out.distribution"
287 ftype="txt"/>
288 <output name="groups" file="out.groups"
289 ftype="txt"/>
290 <output name="clusters" file="out.clusters"
291 ftype="txt"/>
292 <!--
293 <output name="PARalyzer_Utilized" file="out_PARalyzer_Utilized.sam"
294 ftype="sam"/>
295 -->
296 </test>
297 </tests>
298 <help>
299 <![CDATA[
300 .. class:: infomark
301
302 **What it does**
303
304 `paralyzer`_ is an algorithm to generate a high resolution
305 map of interaction sites between RNA-binding proteins and their targets. The
306 algorithm utilizes the deep sequencing reads generated by `PAR-CLIP`_
307 (Photoactivatable-Ribonucleoside-Enhanced Crosslinking and
308 Immunoprecipitation) protocol.The use of photoactivatable nucleotides in the
309 PAR-CLIP protocol results in more efficient crosslinking between the
310 RNA-binding protein and its target relative to other CLIP methods; in addition
311 a nucleotide substitution occurs at the site of crosslinking, providing for
312 single-nucleotide resolution binding information. PARalyzer utilizes this
313 nucleotide substition in a kernel density estimate classifier to generate
314 the high resolution set of Protein-RNA interaction sites.
315
316 .. _paralyzer: https://ohlerlab.mdc-berlin.de/software/PARalyzer_85/
317 .. _PAR-CLIP: http://www.ncbi.nlm.nih.gov/pubmed/20371350
318
319 .. class:: infomark
320
321 **Approaches**
322
323 ``EXTEND_BY_READ``: including this line means that the cluster will be extended
324 beyond the signal to include a region such that it extends to
325 the end of any read that falls within the cluster and contained
326 a conversion, or until the minimum read depth
327 (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met
328
329 ``HAFNER_APPROACH``: identifies the location with the largest number of conversion
330 events and extends the cluster up to
331 ( parameter ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL)nt
332 in each direction from that point, or until the minimum
333 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met
334
335 ``ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL``: the maximum number of reads to
336 extend beyond the positive signal in each direction (default 0)
337 the cluster is defined as the region where the conversion KDE is above
338 the background KDE and then extended up to #integer#, or until the minimum
339 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met
340
341 .. class:: infomark
342
343 **Outputs**
344
345 DISTRIBUTIONS: contains the signal KDE, background KDE, read count & conversion for all locations within each group
346 * The data will be in blocks of four lines for each group
347 * groups on the reverse strand do not need to be reversed; the values always equal nucleotdies from GroupStart to GroupEnd, regardless of Strand
348 * First Column = Chromosome = chromosome on which the group resides
349 * Second Column = Strand = orientation in which the group resides
350 * Third Column = GroupStart = beginning coordinate on the chromosome of the group
351 * Fourth Column = GroupEnd = ending coordinate on the chromosome of the group
352 * Fifth Column = GroupID = unique ID for the group
353 * Sixth Column = Information = reports if the current line contains the Signal, Background, Conversion Percent, or ReadCount
354 * All nucleotides that do not have any possibility of having a conversion event are given a value of -1
355 * All Subsequent Columns: the values for each nucleotide from GroupStart until GroupEnd
356
357
358 GROUPS: a comma separated file containing the information about the resulting groups
359 * Chromosome = chromosome on which the group resides
360 * Strand = orientation in which the group resides
361 * GroupStart = beginning coordinate on the chromosome of the group
362 * GroupEnd = ending coordinate on the chromosome of the group
363 * GroupID = unique ID for the group
364 * ReadCount = number of reads within the group
365
366 CLUSTERS: a comma separated file containing the information about the resulting clusters
367 * Chromosome = chromosome on which the cluster resides
368 * Strand = orientation in which the cluster resides
369 * ClusterStart = beginning coordinate on the chromosome of the cluster
370 * ClusterEnd = ending coordinate on the chromosome of the cluster
371 * ClusterID = unique ID for the cluster
372 * ClusterSequence = sequence of the cluster
373 * ReadCount = number of reads that overlap the cluster by at least 1 nucleotide
374 * ModeLocation = coordinate of the location with the highest signal / (signal + background) value
375 * ModeScore = score of the highest signal / (signal + background) value
376 * ConversionLocationCount = number of unique location where at least 1 conversion occurred
377 * ConversionEventCount = total number of conversions that occurred within the cluster
378 * NonConversionEventCount = total number of possible conversion events that did not occur
379
380 ]]></help>
381 <citations>
382 <citation type="doi">10.1186/gb-2011-12-8-r79</citation>
383 </citations>
384 </tool>