comparison methylkit.xml @ 0:baede5a87a90 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/methylkit commit 14f0b39f64982773ef0367379b915f742eabcc1b
author rnateam
date Wed, 21 Dec 2016 17:26:25 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:baede5a87a90
1 <tool id="methylkit" name="methylKit" version="0.99.2">
2 <description>DNA methylation analysis and annotation</description>
3 <macros>
4 <macro name="differential_methylation">
5 <param name="overdispersion" type="select"
6 label="overdispersion"
7 help="if set to 'none' (default), no overdispersion
8 correction will be attempted. 'MN' applies a scaling
9 parameter to variance estimated by the model.
10 If set to 'shrinkMN' (experimental parameter),
11 scaling parameter will be shrunk towards a common value">
12 <option value="none" selected="True">
13 none
14 </option>
15 <option value="MN">
16 MN
17 </option>
18 <option value="shrinkMN">
19 shrinkMN
20 </option>
21 </param>
22 <param name="adjust" type="select"
23 label="adjust"
24 help="different methods to correct the p-values
25 for multiple testing (default: SLIM).">
26 <option value="SLIM" selected="True">
27 SLIM
28 </option>
29 <option value="holm">
30 holm
31 </option>
32 <option value="hochberg">
33 hochberg
34 </option>
35 <option value="hommel">
36 hommel
37 </option>
38 <option value="bonferroni">
39 bonferroni
40 </option>
41 <option value="BH">
42 BH
43 </option>
44 <option value="BY">
45 BY
46 </option>
47 <option value="fdr">
48 fdr
49 </option>
50 <option value="none">
51 none
52 </option>
53 <option value="qvalue">
54 qvalue
55 </option>
56 </param>
57 <param name="effect" type="select"
58 label="effect"
59 help="method to calculate the mean methylation different
60 between groups using read coverage as weights (default: wmean).
61 When set to 'mean', the generic mean is applied and when
62 set to 'predicted', predicted means from the logistic
63 regression model is used for calculating the effect.">
64 <option value="wmean" selected="True">
65 wmean
66 </option>
67 <option value="mean">
68 mean
69 </option>
70 <option value="predicted">
71 predicted
72 </option>
73 </param>
74 <param name="test" type="select"
75 label="test"
76 help="the statistical test used to determine
77 the methylation differences (default: Chisq-test).
78 The F-test can be chosen
79 if overdispersion control is applied.">
80 <option value="Chisq" selected="True">
81 Chisq
82 </option>
83 <option value="F">
84 F
85 </option>
86 </param>
87 <param name="qvalue_cutoff" type="float"
88 value="0.01" label="qvalue.cutoff"
89 help="cutoff for qvalue of differential methylation statistic (default:0.01)">
90 <validator type="in_range"
91 message="Minimum 0 and maximum 1" min="0" max="1"/>
92 </param>
93 <param name="meth_cutoff" type="float"
94 value="25" label="meth.cutoff"
95 help="cutoff for absolute value of methylation percentage change between test and control (default:25)">
96 <validator type="in_range"
97 message="Minimum 0 and maximum 100" min="0" max="100"/>
98 </param>
99 <param name="type" type="select"
100 label="type"
101 help="For retrieving
102 hyper-methylated regions/bases type='hyper',
103 for hypo-methylated type='hypo' (default:'all')">
104 <option value="all" selected="True">
105 all
106 </option>
107 <option value="hyper">
108 hyper
109 </option>
110 <option value="hypo">
111 hypo
112 </option>
113 </param>
114 </macro>
115 <macro name="clustering">
116 <param name="dist" type="select"
117 label="dist"
118 help="the distance measure to be used.
119 (default: correlation)">
120 <option value="correlation" selected="True">
121 correlation
122 </option>
123 <option value="euclidean">
124 euclidean
125 </option>
126 <option value="maximum">
127 maximum
128 </option>
129 <option value="manhattan">
130 manhattan
131 </option>
132 <option value="canberra">
133 canberra
134 </option>
135 <option value="binary">
136 binary
137 </option>
138 <option value="minkowski">
139 minkowski
140 </option>
141 </param>
142 <param name="method" type="select"
143 label="method"
144 help="the agglomeration method to be used.
145 (default: ward)">
146 <option value="ward" selected="True">
147 ward
148 </option>
149 <option value="single">
150 single
151 </option>
152 <option value="complete">
153 complete
154 </option>
155 <option value="average">
156 average
157 </option>
158 <option value="mcquitty">
159 mcquitty
160 </option>
161 <option value="median">
162 median
163 </option>
164 <option value="centroid">
165 centroid
166 </option>
167 </param>
168 </macro>
169 </macros>
170 <requirements>
171 <requirement type="package" version="0.99.2">bioconductor-methylkit</requirement>
172 </requirements>
173 <stdio>
174 <regex match="Execution halted"
175 source="both"
176 level="fatal"
177 description="Execution halted." />
178 <regex match="Input-Error 01"
179 source="both"
180 level="fatal"
181 description="Error in your input parameters: Make sure you only apply factors to selected samples." />
182 <regex match="Error in"
183 source="both"
184 level="fatal"
185 description="An undefined error occured, please check your intput carefully and contact your administrator." />
186 </stdio>
187 <command>
188 <![CDATA[
189 Rscript $script_file
190 ]]>
191 </command>
192 <configfiles>
193 <configfile name="script_file">
194 library("methylKit")
195
196 test_files = list()
197 control_files = list()
198
199 test_ids = list()
200 control_ids = list()
201
202 #for $i, $s in enumerate( $test_series )
203 test_ids[${i}+1] = paste("test ", ${i}+1, sep="")
204 test_files[${i}+1] = "${s.input.file_name}"
205 #end for
206
207 #for $i, $s in enumerate( $control_series )
208 control_ids[${i}+1] = paste("control ", ${i}+1, sep="")
209 control_files[${i}+1] = "${s.input.file_name}"
210 #end for
211
212 input_files = append(test_files, control_files)
213 sample_ids = append(test_ids, control_ids)
214 treatment_tag = c(rep.int(1, length(test_ids)), rep.int(0, length(control_ids)))
215
216 myobj=methRead(input_files, sample.id=sample_ids, assembly="${assembly}",
217 pipeline="${pipeline}", treatment=treatment_tag)
218
219 pdf('output_statistics.pdf')
220 for (obj in myobj){
221 getMethylationStats(obj,plot=TRUE,both.strands=FALSE)
222 getCoverageStats(obj,plot=TRUE,both.strands=FALSE)
223 }
224 devname = dev.off()
225
226 ## unite function
227 methidh=unite(myobj)
228
229 pdf("output_correlation.pdf")
230 getCorrelation(object = methidh, plot=TRUE, method = "${correlation}")
231 devname = dev.off()
232
233 #if $input_type.choice in ["all", "differential_methylation"]:
234 ## the last two arguments slim, weighted.mean
235 ## have the redundant counterparts in effect and adjust,
236 ## so turning them off to avoide the possible conflict.
237 myDiff = calculateDiffMeth(methidh, overdispersion="${input_type.overdispersion}",
238 adjust="${input_type.adjust}", effect="${input_type.effect}", test="${input_type.test}",
239 slim=FALSE, weighted.mean=FALSE)
240
241 bedgraph(myDiff, file.name="output_myDiff.bedgraph", col.name="meth.diff",
242 unmeth=FALSE, log.transform=FALSE, negative=FALSE, add.on="")
243
244 MethPerChr = diffMethPerChr(myDiff, plot=FALSE,
245 qvalue.cutoff=${input_type.qvalue_cutoff},
246 meth.cutoff=${input_type.meth_cutoff})
247 write.table(MethPerChr, sep="\t", row.names=FALSE, quote=FALSE, file="output_MethPerChr.tsv")
248
249 MethylDiff = getMethylDiff(myDiff, difference=${input_type.meth_cutoff},
250 qvalue=${input_type.qvalue_cutoff}, type="${input_type.type}")
251 bedgraph(MethylDiff, file.name="output_MethylDiff.bedgraph", col.name="meth.diff",
252 unmeth=FALSE,log.transform=FALSE,negative=FALSE,add.on="")
253 #end if
254
255 #if $input_type.choice in ["all", "clustering"]:
256 pdf( "output_clustering.pdf" )
257 methClust = clusterSamples(methidh, dist="${input_type.dist}", method="${input_type.method}")
258 devname = dev.off()
259
260 pdf( "output_PCA.pdf" )
261 PCASamples(methidh)
262 devname = dev.off()
263 #end if
264
265 #if $input_type.choice in ["all", "segmentation"]:
266 ## methSeg works for methylRaw or methylDiff with resolution region,
267 ## so methylBase has to be tiled before
268 tileRaw = tileMethylCounts(myobj[[1]])
269 tileBase = tileMethylCounts(methidh)
270 tileDiff = calculateDiffMeth(tileBase)
271
272 ## methseg generates Granges
273 segRaw = methSeg(tileRaw, diagnostic.plot = FALSE)
274 segDiff = methSeg(tileDiff, diagnostic.plot = FALSE)
275
276 ## and can be exported as BED
277 methSeg2bed(segments = segRaw, filename = "output_seg_raw.bed")
278 methSeg2bed(segments = segDiff, filename = "output_seg_diff.bed")
279 #end if
280 </configfile>
281 </configfiles>
282 <inputs>
283 <repeat name="test_series" title="Test samples" min="1">
284 <param name="input" type="data" format="tabular" label="Add a file"
285 help="Such input file may be obtained from AMP pipeline for aligning RRBS reads.">
286 <validator type="unspecified_build" />
287 </param>
288 </repeat>
289 <repeat name="control_series" title="Control samples" min="1">
290 <param name="input" type="data" format="tabular" label="Add a file"
291 help="Such input file may be obtained from AMP pipeline for aligning RRBS reads." >
292 <validator type="unspecified_build" />
293 </param>
294 </repeat>
295 <param name="assembly" type="text"
296 value="hg18" label="assembly"
297 help="A string that defines the genome assembly such as
298 hg18, mm9 (default: hg18).">
299 </param>
300 <param name="correlation" type="select"
301 label="correlation"
302 help="correlation method (default: pearson)">
303 <option value="pearson" selected="True">
304 pearson
305 </option>
306 <option value="kendall">
307 kendall
308 </option>
309 <option value="spearman">
310 spearman
311 </option>
312 </param>
313 <param name="pipeline" type="select"
314 label="pipeline"
315 help="name of the alignment pipeline (default: amp)">
316 <option value="amp" selected="True">
317 amp
318 </option>
319 <option value="bismark">
320 bismark
321 </option>
322 <option value="bismarkCoverage">
323 bismarkCoverage
324 </option>
325 <option value="bismarkCytosineReport">
326 bismarkCytosineReport
327 </option>
328 </param>
329 <conditional name="input_type">
330 <param name="choice" type="select"
331 label="analysis to carry out:"
332 help="The analysis you wish to carry out.">
333 <option value="all" selected="True">
334 All provided analysis
335 </option>
336 <option value="differential_methylation">
337 Differential methylation
338 </option>
339 <option value="clustering">
340 Clustering
341 </option>
342 <option value="segmentation">
343 Segmentation
344 </option>
345 </param>
346 <when value="all">
347 <expand macro="differential_methylation" />
348 <expand macro="clustering" />
349 </when>
350 <when value="differential_methylation">
351 <expand macro="differential_methylation" />
352 </when>
353 <when value="clustering">
354 <expand macro="clustering" />
355 </when>
356 <when value="segmentation" />
357 </conditional>
358 </inputs>
359 <outputs>
360 <data name="output_statistics" format="pdf"
361 from_work_dir="output_statistics.pdf"
362 label="${tool.name} on ${on_string}: CpG statistics">
363 </data>
364
365 <data name="output_correlation" format="pdf"
366 from_work_dir="output_correlation.pdf"
367 label="${tool.name} on ${on_string}: correlation between samples">
368 </data>
369
370 <data name="output_myDiff" format="bedgraph"
371 from_work_dir="output_myDiff.bedgraph"
372 label="${tool.name} on ${on_string}: differential methylation">
373 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
374 </data>
375
376 <data name="output_MethylDiff" format="bedgraph"
377 from_work_dir="output_MethylDiff.bedgraph"
378 label="${tool.name} on ${on_string}: differential methylation - subset">
379 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
380 </data>
381
382 <data name="output_MethPerChr" format="tabular"
383 from_work_dir="output_MethPerChr.tsv"
384 label="${tool.name} on ${on_string}: number of hyper/hypo sites">
385 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
386 </data>
387
388 <data name="output_clustering" format="pdf"
389 from_work_dir="output_clustering.pdf"
390 label="${tool.name} on ${on_string}: hierarchical clustering">
391 <filter>input_type['choice'] in ['all', 'clustering']</filter>
392 </data>
393
394 <data name="output_PCA" format="pdf"
395 from_work_dir="output_PCA.pdf"
396 label="${tool.name} on ${on_string}: PCA">
397 <filter>input_type['choice'] in ['all', 'clustering']</filter>
398 </data>
399
400 <data name="output_seg_raw" format="bed"
401 from_work_dir="output_seg_raw.bed"
402 label="${tool.name} on ${on_string}: methylation segment">
403 <filter>input_type['choice'] in ['all', 'segmentation']</filter>
404 </data>
405
406 <data name="output_seg_diff" format="bed"
407 from_work_dir="output_seg_diff.bed"
408 label="${tool.name} on ${on_string}: differential methylation segment">
409 <filter>input_type['choice'] in ['all', 'segmentation']</filter>
410 </data>
411 </outputs>
412 <tests>
413 <test>
414 <repeat name="test_series">
415 <param name="input" value="input_test1.myCpG.txt" dbkey="hg18" ftype="tabular" />
416 </repeat>
417 <repeat name="test_series">
418 <param name="input" value="input_test2.myCpG.txt" dbkey="hg18" ftype="tabular" />
419 </repeat>
420 <repeat name="control_series">
421 <param name="input" value="input_control1.myCpG.txt" dbkey="hg18" ftype="tabular" />
422 </repeat>
423 <repeat name="control_series">
424 <param name="input" value="input_control2.myCpG.txt" dbkey="hg18" ftype="tabular" />
425 </repeat>
426 <param name="assembly" value="hg18" />
427 <param name="correlation" value="pearson" />
428 <param name="pipeline" value="amp" />
429 <param name="choice" value="all" />
430 <param name="overdispersion" value="none" />
431 <param name="adjust" value="SLIM" />
432 <param name="effect" value="wmean" />
433 <param name="test" value="Chisq" />
434 <param name="qvalue_cutoff" value="0.01" />
435 <param name="meth_cutoff" value="25" />
436 <param name="type" value="all" />
437 <param name="dist" value="correlation" />
438 <param name="method" value="ward" />
439 <output name="output_statistics" file="output_statistics.pdf"
440 ftype="pdf" compare="sim_size"/>
441 <output name="output_correlation" file="output_correlation.pdf"
442 ftype="pdf" compare="sim_size"/>
443 <output name="output_myDiff" file="output_myDiff.bedgraph"
444 ftype="bedgraph"/>
445 <output name="output_MethPerChr" file="output_MethPerChr.tsv"
446 ftype="tabular"/>
447 <output name="output_MethylDiff" file="output_MethylDiff.bedgraph"
448 ftype="bedgraph"/>
449 <output name="output_clustering" file="output_clustering.pdf"
450 ftype="pdf" compare="sim_size"/>
451 <output name="output_PCA" file="output_PCA.pdf"
452 ftype="pdf" compare="sim_size"/>
453 <output name="output_seg_raw" file="output_seg_raw.bed"
454 ftype="bed"/>
455 <output name="output_seg_diff" file="output_seg_diff.bed"
456 ftype="bed"/>
457 </test>
458 </tests>
459 <help>
460 <![CDATA[
461 .. class:: infomark
462
463 **What it does**
464
465 `methylKit`_ is an R package for DNA methylation analysis and annotation
466 from high-throughput bisulfite sequencing.
467 The package is designed to deal with sequencing data from RRBS and
468 its variants, but also target-capture methods such as Agilent SureSelect
469 methyl-seq. In addition, methylKit can deal with base-pair resolution data
470 for 5hmC obtained from Tab-seq or oxBS-seq. It can also handle whole-genome
471 bisulfite sequencing data if proper input format is provided.
472
473 .. _methylKit: https://github.com/al2na/methylKit
474
475 The Galaxy tool enables three types of analysis:
476 * differential methylation
477 * clustering
478 * segmentation
479
480 The user can choose to run all provided analysis or run an individual one.
481
482 .. class:: infomark
483
484 **Input**
485
486 Typically, bisulfite converted reads are aligned to the genome and %
487 methylation value per base is calculated by processing alignments.
488 methylKit takes that % methylation value per base information as input.
489 Such input file may be obtained from `AMP`_ pipeline
490 for aligning RRBS reads. A typical input file looks like this::
491
492
493 chrBase chr base strand coverage freqC freqT
494
495 chr21.9764539 chr21 9764539 R 12 25.00 75.00
496
497 chr21.9764513 chr21 9764513 F 12 0.00 100.00
498
499
500 .. _AMP: http://code.google.com/p/amp-errbs/
501
502 .. class:: infomark
503
504 **Output**
505
506 The outputs from differential methylation
507 * ``differential methylation``: The `bedgraph`_ file contains differentially methylated bases/regions and the corresponding statistics.
508 * ``differential methylation - subset``: The bedgraph file contains the subset of differentially methylated bases/regions that satisfies the user defined thresholds with qvalue.cutoff and meth.cutoff.
509 * ``number of hyper/hypo sites``: The tabular file contains number of hyper/hypo methylated regions/bases.
510
511 .. _bedgraph: https://genome.ucsc.edu/goldenpath/help/bedgraph.html
512
513 The outputs from clustering
514 * ``hierarchical clustering``: The figure shows hierarchical clustering using methylation data.
515 * ``PCA``: The figure shows principal components analysis of methylation data.
516
517 The output from segmentation
518 * ``methylation segment``: The `bed`_ file contains the profile of methylation segment.
519 * ``differential methylation segment``: The bed file contains the profile of differential methylation segment.
520
521 .. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1
522 ]]>
523 </help>
524 <citations>
525 <citation type="doi">10.1186/gb-2012-13-10-r87</citation>
526 </citations>
527 </tool>