Mercurial > repos > rnateam > methylkit
comparison methylkit.xml @ 0:baede5a87a90 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/methylkit commit 14f0b39f64982773ef0367379b915f742eabcc1b
author | rnateam |
---|---|
date | Wed, 21 Dec 2016 17:26:25 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:baede5a87a90 |
---|---|
1 <tool id="methylkit" name="methylKit" version="0.99.2"> | |
2 <description>DNA methylation analysis and annotation</description> | |
3 <macros> | |
4 <macro name="differential_methylation"> | |
5 <param name="overdispersion" type="select" | |
6 label="overdispersion" | |
7 help="if set to 'none' (default), no overdispersion | |
8 correction will be attempted. 'MN' applies a scaling | |
9 parameter to variance estimated by the model. | |
10 If set to 'shrinkMN' (experimental parameter), | |
11 scaling parameter will be shrunk towards a common value"> | |
12 <option value="none" selected="True"> | |
13 none | |
14 </option> | |
15 <option value="MN"> | |
16 MN | |
17 </option> | |
18 <option value="shrinkMN"> | |
19 shrinkMN | |
20 </option> | |
21 </param> | |
22 <param name="adjust" type="select" | |
23 label="adjust" | |
24 help="different methods to correct the p-values | |
25 for multiple testing (default: SLIM)."> | |
26 <option value="SLIM" selected="True"> | |
27 SLIM | |
28 </option> | |
29 <option value="holm"> | |
30 holm | |
31 </option> | |
32 <option value="hochberg"> | |
33 hochberg | |
34 </option> | |
35 <option value="hommel"> | |
36 hommel | |
37 </option> | |
38 <option value="bonferroni"> | |
39 bonferroni | |
40 </option> | |
41 <option value="BH"> | |
42 BH | |
43 </option> | |
44 <option value="BY"> | |
45 BY | |
46 </option> | |
47 <option value="fdr"> | |
48 fdr | |
49 </option> | |
50 <option value="none"> | |
51 none | |
52 </option> | |
53 <option value="qvalue"> | |
54 qvalue | |
55 </option> | |
56 </param> | |
57 <param name="effect" type="select" | |
58 label="effect" | |
59 help="method to calculate the mean methylation different | |
60 between groups using read coverage as weights (default: wmean). | |
61 When set to 'mean', the generic mean is applied and when | |
62 set to 'predicted', predicted means from the logistic | |
63 regression model is used for calculating the effect."> | |
64 <option value="wmean" selected="True"> | |
65 wmean | |
66 </option> | |
67 <option value="mean"> | |
68 mean | |
69 </option> | |
70 <option value="predicted"> | |
71 predicted | |
72 </option> | |
73 </param> | |
74 <param name="test" type="select" | |
75 label="test" | |
76 help="the statistical test used to determine | |
77 the methylation differences (default: Chisq-test). | |
78 The F-test can be chosen | |
79 if overdispersion control is applied."> | |
80 <option value="Chisq" selected="True"> | |
81 Chisq | |
82 </option> | |
83 <option value="F"> | |
84 F | |
85 </option> | |
86 </param> | |
87 <param name="qvalue_cutoff" type="float" | |
88 value="0.01" label="qvalue.cutoff" | |
89 help="cutoff for qvalue of differential methylation statistic (default:0.01)"> | |
90 <validator type="in_range" | |
91 message="Minimum 0 and maximum 1" min="0" max="1"/> | |
92 </param> | |
93 <param name="meth_cutoff" type="float" | |
94 value="25" label="meth.cutoff" | |
95 help="cutoff for absolute value of methylation percentage change between test and control (default:25)"> | |
96 <validator type="in_range" | |
97 message="Minimum 0 and maximum 100" min="0" max="100"/> | |
98 </param> | |
99 <param name="type" type="select" | |
100 label="type" | |
101 help="For retrieving | |
102 hyper-methylated regions/bases type='hyper', | |
103 for hypo-methylated type='hypo' (default:'all')"> | |
104 <option value="all" selected="True"> | |
105 all | |
106 </option> | |
107 <option value="hyper"> | |
108 hyper | |
109 </option> | |
110 <option value="hypo"> | |
111 hypo | |
112 </option> | |
113 </param> | |
114 </macro> | |
115 <macro name="clustering"> | |
116 <param name="dist" type="select" | |
117 label="dist" | |
118 help="the distance measure to be used. | |
119 (default: correlation)"> | |
120 <option value="correlation" selected="True"> | |
121 correlation | |
122 </option> | |
123 <option value="euclidean"> | |
124 euclidean | |
125 </option> | |
126 <option value="maximum"> | |
127 maximum | |
128 </option> | |
129 <option value="manhattan"> | |
130 manhattan | |
131 </option> | |
132 <option value="canberra"> | |
133 canberra | |
134 </option> | |
135 <option value="binary"> | |
136 binary | |
137 </option> | |
138 <option value="minkowski"> | |
139 minkowski | |
140 </option> | |
141 </param> | |
142 <param name="method" type="select" | |
143 label="method" | |
144 help="the agglomeration method to be used. | |
145 (default: ward)"> | |
146 <option value="ward" selected="True"> | |
147 ward | |
148 </option> | |
149 <option value="single"> | |
150 single | |
151 </option> | |
152 <option value="complete"> | |
153 complete | |
154 </option> | |
155 <option value="average"> | |
156 average | |
157 </option> | |
158 <option value="mcquitty"> | |
159 mcquitty | |
160 </option> | |
161 <option value="median"> | |
162 median | |
163 </option> | |
164 <option value="centroid"> | |
165 centroid | |
166 </option> | |
167 </param> | |
168 </macro> | |
169 </macros> | |
170 <requirements> | |
171 <requirement type="package" version="0.99.2">bioconductor-methylkit</requirement> | |
172 </requirements> | |
173 <stdio> | |
174 <regex match="Execution halted" | |
175 source="both" | |
176 level="fatal" | |
177 description="Execution halted." /> | |
178 <regex match="Input-Error 01" | |
179 source="both" | |
180 level="fatal" | |
181 description="Error in your input parameters: Make sure you only apply factors to selected samples." /> | |
182 <regex match="Error in" | |
183 source="both" | |
184 level="fatal" | |
185 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | |
186 </stdio> | |
187 <command> | |
188 <![CDATA[ | |
189 Rscript $script_file | |
190 ]]> | |
191 </command> | |
192 <configfiles> | |
193 <configfile name="script_file"> | |
194 library("methylKit") | |
195 | |
196 test_files = list() | |
197 control_files = list() | |
198 | |
199 test_ids = list() | |
200 control_ids = list() | |
201 | |
202 #for $i, $s in enumerate( $test_series ) | |
203 test_ids[${i}+1] = paste("test ", ${i}+1, sep="") | |
204 test_files[${i}+1] = "${s.input.file_name}" | |
205 #end for | |
206 | |
207 #for $i, $s in enumerate( $control_series ) | |
208 control_ids[${i}+1] = paste("control ", ${i}+1, sep="") | |
209 control_files[${i}+1] = "${s.input.file_name}" | |
210 #end for | |
211 | |
212 input_files = append(test_files, control_files) | |
213 sample_ids = append(test_ids, control_ids) | |
214 treatment_tag = c(rep.int(1, length(test_ids)), rep.int(0, length(control_ids))) | |
215 | |
216 myobj=methRead(input_files, sample.id=sample_ids, assembly="${assembly}", | |
217 pipeline="${pipeline}", treatment=treatment_tag) | |
218 | |
219 pdf('output_statistics.pdf') | |
220 for (obj in myobj){ | |
221 getMethylationStats(obj,plot=TRUE,both.strands=FALSE) | |
222 getCoverageStats(obj,plot=TRUE,both.strands=FALSE) | |
223 } | |
224 devname = dev.off() | |
225 | |
226 ## unite function | |
227 methidh=unite(myobj) | |
228 | |
229 pdf("output_correlation.pdf") | |
230 getCorrelation(object = methidh, plot=TRUE, method = "${correlation}") | |
231 devname = dev.off() | |
232 | |
233 #if $input_type.choice in ["all", "differential_methylation"]: | |
234 ## the last two arguments slim, weighted.mean | |
235 ## have the redundant counterparts in effect and adjust, | |
236 ## so turning them off to avoide the possible conflict. | |
237 myDiff = calculateDiffMeth(methidh, overdispersion="${input_type.overdispersion}", | |
238 adjust="${input_type.adjust}", effect="${input_type.effect}", test="${input_type.test}", | |
239 slim=FALSE, weighted.mean=FALSE) | |
240 | |
241 bedgraph(myDiff, file.name="output_myDiff.bedgraph", col.name="meth.diff", | |
242 unmeth=FALSE, log.transform=FALSE, negative=FALSE, add.on="") | |
243 | |
244 MethPerChr = diffMethPerChr(myDiff, plot=FALSE, | |
245 qvalue.cutoff=${input_type.qvalue_cutoff}, | |
246 meth.cutoff=${input_type.meth_cutoff}) | |
247 write.table(MethPerChr, sep="\t", row.names=FALSE, quote=FALSE, file="output_MethPerChr.tsv") | |
248 | |
249 MethylDiff = getMethylDiff(myDiff, difference=${input_type.meth_cutoff}, | |
250 qvalue=${input_type.qvalue_cutoff}, type="${input_type.type}") | |
251 bedgraph(MethylDiff, file.name="output_MethylDiff.bedgraph", col.name="meth.diff", | |
252 unmeth=FALSE,log.transform=FALSE,negative=FALSE,add.on="") | |
253 #end if | |
254 | |
255 #if $input_type.choice in ["all", "clustering"]: | |
256 pdf( "output_clustering.pdf" ) | |
257 methClust = clusterSamples(methidh, dist="${input_type.dist}", method="${input_type.method}") | |
258 devname = dev.off() | |
259 | |
260 pdf( "output_PCA.pdf" ) | |
261 PCASamples(methidh) | |
262 devname = dev.off() | |
263 #end if | |
264 | |
265 #if $input_type.choice in ["all", "segmentation"]: | |
266 ## methSeg works for methylRaw or methylDiff with resolution region, | |
267 ## so methylBase has to be tiled before | |
268 tileRaw = tileMethylCounts(myobj[[1]]) | |
269 tileBase = tileMethylCounts(methidh) | |
270 tileDiff = calculateDiffMeth(tileBase) | |
271 | |
272 ## methseg generates Granges | |
273 segRaw = methSeg(tileRaw, diagnostic.plot = FALSE) | |
274 segDiff = methSeg(tileDiff, diagnostic.plot = FALSE) | |
275 | |
276 ## and can be exported as BED | |
277 methSeg2bed(segments = segRaw, filename = "output_seg_raw.bed") | |
278 methSeg2bed(segments = segDiff, filename = "output_seg_diff.bed") | |
279 #end if | |
280 </configfile> | |
281 </configfiles> | |
282 <inputs> | |
283 <repeat name="test_series" title="Test samples" min="1"> | |
284 <param name="input" type="data" format="tabular" label="Add a file" | |
285 help="Such input file may be obtained from AMP pipeline for aligning RRBS reads."> | |
286 <validator type="unspecified_build" /> | |
287 </param> | |
288 </repeat> | |
289 <repeat name="control_series" title="Control samples" min="1"> | |
290 <param name="input" type="data" format="tabular" label="Add a file" | |
291 help="Such input file may be obtained from AMP pipeline for aligning RRBS reads." > | |
292 <validator type="unspecified_build" /> | |
293 </param> | |
294 </repeat> | |
295 <param name="assembly" type="text" | |
296 value="hg18" label="assembly" | |
297 help="A string that defines the genome assembly such as | |
298 hg18, mm9 (default: hg18)."> | |
299 </param> | |
300 <param name="correlation" type="select" | |
301 label="correlation" | |
302 help="correlation method (default: pearson)"> | |
303 <option value="pearson" selected="True"> | |
304 pearson | |
305 </option> | |
306 <option value="kendall"> | |
307 kendall | |
308 </option> | |
309 <option value="spearman"> | |
310 spearman | |
311 </option> | |
312 </param> | |
313 <param name="pipeline" type="select" | |
314 label="pipeline" | |
315 help="name of the alignment pipeline (default: amp)"> | |
316 <option value="amp" selected="True"> | |
317 amp | |
318 </option> | |
319 <option value="bismark"> | |
320 bismark | |
321 </option> | |
322 <option value="bismarkCoverage"> | |
323 bismarkCoverage | |
324 </option> | |
325 <option value="bismarkCytosineReport"> | |
326 bismarkCytosineReport | |
327 </option> | |
328 </param> | |
329 <conditional name="input_type"> | |
330 <param name="choice" type="select" | |
331 label="analysis to carry out:" | |
332 help="The analysis you wish to carry out."> | |
333 <option value="all" selected="True"> | |
334 All provided analysis | |
335 </option> | |
336 <option value="differential_methylation"> | |
337 Differential methylation | |
338 </option> | |
339 <option value="clustering"> | |
340 Clustering | |
341 </option> | |
342 <option value="segmentation"> | |
343 Segmentation | |
344 </option> | |
345 </param> | |
346 <when value="all"> | |
347 <expand macro="differential_methylation" /> | |
348 <expand macro="clustering" /> | |
349 </when> | |
350 <when value="differential_methylation"> | |
351 <expand macro="differential_methylation" /> | |
352 </when> | |
353 <when value="clustering"> | |
354 <expand macro="clustering" /> | |
355 </when> | |
356 <when value="segmentation" /> | |
357 </conditional> | |
358 </inputs> | |
359 <outputs> | |
360 <data name="output_statistics" format="pdf" | |
361 from_work_dir="output_statistics.pdf" | |
362 label="${tool.name} on ${on_string}: CpG statistics"> | |
363 </data> | |
364 | |
365 <data name="output_correlation" format="pdf" | |
366 from_work_dir="output_correlation.pdf" | |
367 label="${tool.name} on ${on_string}: correlation between samples"> | |
368 </data> | |
369 | |
370 <data name="output_myDiff" format="bedgraph" | |
371 from_work_dir="output_myDiff.bedgraph" | |
372 label="${tool.name} on ${on_string}: differential methylation"> | |
373 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> | |
374 </data> | |
375 | |
376 <data name="output_MethylDiff" format="bedgraph" | |
377 from_work_dir="output_MethylDiff.bedgraph" | |
378 label="${tool.name} on ${on_string}: differential methylation - subset"> | |
379 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> | |
380 </data> | |
381 | |
382 <data name="output_MethPerChr" format="tabular" | |
383 from_work_dir="output_MethPerChr.tsv" | |
384 label="${tool.name} on ${on_string}: number of hyper/hypo sites"> | |
385 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> | |
386 </data> | |
387 | |
388 <data name="output_clustering" format="pdf" | |
389 from_work_dir="output_clustering.pdf" | |
390 label="${tool.name} on ${on_string}: hierarchical clustering"> | |
391 <filter>input_type['choice'] in ['all', 'clustering']</filter> | |
392 </data> | |
393 | |
394 <data name="output_PCA" format="pdf" | |
395 from_work_dir="output_PCA.pdf" | |
396 label="${tool.name} on ${on_string}: PCA"> | |
397 <filter>input_type['choice'] in ['all', 'clustering']</filter> | |
398 </data> | |
399 | |
400 <data name="output_seg_raw" format="bed" | |
401 from_work_dir="output_seg_raw.bed" | |
402 label="${tool.name} on ${on_string}: methylation segment"> | |
403 <filter>input_type['choice'] in ['all', 'segmentation']</filter> | |
404 </data> | |
405 | |
406 <data name="output_seg_diff" format="bed" | |
407 from_work_dir="output_seg_diff.bed" | |
408 label="${tool.name} on ${on_string}: differential methylation segment"> | |
409 <filter>input_type['choice'] in ['all', 'segmentation']</filter> | |
410 </data> | |
411 </outputs> | |
412 <tests> | |
413 <test> | |
414 <repeat name="test_series"> | |
415 <param name="input" value="input_test1.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
416 </repeat> | |
417 <repeat name="test_series"> | |
418 <param name="input" value="input_test2.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
419 </repeat> | |
420 <repeat name="control_series"> | |
421 <param name="input" value="input_control1.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
422 </repeat> | |
423 <repeat name="control_series"> | |
424 <param name="input" value="input_control2.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
425 </repeat> | |
426 <param name="assembly" value="hg18" /> | |
427 <param name="correlation" value="pearson" /> | |
428 <param name="pipeline" value="amp" /> | |
429 <param name="choice" value="all" /> | |
430 <param name="overdispersion" value="none" /> | |
431 <param name="adjust" value="SLIM" /> | |
432 <param name="effect" value="wmean" /> | |
433 <param name="test" value="Chisq" /> | |
434 <param name="qvalue_cutoff" value="0.01" /> | |
435 <param name="meth_cutoff" value="25" /> | |
436 <param name="type" value="all" /> | |
437 <param name="dist" value="correlation" /> | |
438 <param name="method" value="ward" /> | |
439 <output name="output_statistics" file="output_statistics.pdf" | |
440 ftype="pdf" compare="sim_size"/> | |
441 <output name="output_correlation" file="output_correlation.pdf" | |
442 ftype="pdf" compare="sim_size"/> | |
443 <output name="output_myDiff" file="output_myDiff.bedgraph" | |
444 ftype="bedgraph"/> | |
445 <output name="output_MethPerChr" file="output_MethPerChr.tsv" | |
446 ftype="tabular"/> | |
447 <output name="output_MethylDiff" file="output_MethylDiff.bedgraph" | |
448 ftype="bedgraph"/> | |
449 <output name="output_clustering" file="output_clustering.pdf" | |
450 ftype="pdf" compare="sim_size"/> | |
451 <output name="output_PCA" file="output_PCA.pdf" | |
452 ftype="pdf" compare="sim_size"/> | |
453 <output name="output_seg_raw" file="output_seg_raw.bed" | |
454 ftype="bed"/> | |
455 <output name="output_seg_diff" file="output_seg_diff.bed" | |
456 ftype="bed"/> | |
457 </test> | |
458 </tests> | |
459 <help> | |
460 <![CDATA[ | |
461 .. class:: infomark | |
462 | |
463 **What it does** | |
464 | |
465 `methylKit`_ is an R package for DNA methylation analysis and annotation | |
466 from high-throughput bisulfite sequencing. | |
467 The package is designed to deal with sequencing data from RRBS and | |
468 its variants, but also target-capture methods such as Agilent SureSelect | |
469 methyl-seq. In addition, methylKit can deal with base-pair resolution data | |
470 for 5hmC obtained from Tab-seq or oxBS-seq. It can also handle whole-genome | |
471 bisulfite sequencing data if proper input format is provided. | |
472 | |
473 .. _methylKit: https://github.com/al2na/methylKit | |
474 | |
475 The Galaxy tool enables three types of analysis: | |
476 * differential methylation | |
477 * clustering | |
478 * segmentation | |
479 | |
480 The user can choose to run all provided analysis or run an individual one. | |
481 | |
482 .. class:: infomark | |
483 | |
484 **Input** | |
485 | |
486 Typically, bisulfite converted reads are aligned to the genome and % | |
487 methylation value per base is calculated by processing alignments. | |
488 methylKit takes that % methylation value per base information as input. | |
489 Such input file may be obtained from `AMP`_ pipeline | |
490 for aligning RRBS reads. A typical input file looks like this:: | |
491 | |
492 | |
493 chrBase chr base strand coverage freqC freqT | |
494 | |
495 chr21.9764539 chr21 9764539 R 12 25.00 75.00 | |
496 | |
497 chr21.9764513 chr21 9764513 F 12 0.00 100.00 | |
498 | |
499 | |
500 .. _AMP: http://code.google.com/p/amp-errbs/ | |
501 | |
502 .. class:: infomark | |
503 | |
504 **Output** | |
505 | |
506 The outputs from differential methylation | |
507 * ``differential methylation``: The `bedgraph`_ file contains differentially methylated bases/regions and the corresponding statistics. | |
508 * ``differential methylation - subset``: The bedgraph file contains the subset of differentially methylated bases/regions that satisfies the user defined thresholds with qvalue.cutoff and meth.cutoff. | |
509 * ``number of hyper/hypo sites``: The tabular file contains number of hyper/hypo methylated regions/bases. | |
510 | |
511 .. _bedgraph: https://genome.ucsc.edu/goldenpath/help/bedgraph.html | |
512 | |
513 The outputs from clustering | |
514 * ``hierarchical clustering``: The figure shows hierarchical clustering using methylation data. | |
515 * ``PCA``: The figure shows principal components analysis of methylation data. | |
516 | |
517 The output from segmentation | |
518 * ``methylation segment``: The `bed`_ file contains the profile of methylation segment. | |
519 * ``differential methylation segment``: The bed file contains the profile of differential methylation segment. | |
520 | |
521 .. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1 | |
522 ]]> | |
523 </help> | |
524 <citations> | |
525 <citation type="doi">10.1186/gb-2012-13-10-r87</citation> | |
526 </citations> | |
527 </tool> |