Mercurial > repos > rnateam > methylkit
diff methylkit.xml @ 0:baede5a87a90 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/methylkit commit 14f0b39f64982773ef0367379b915f742eabcc1b
author | rnateam |
---|---|
date | Wed, 21 Dec 2016 17:26:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/methylkit.xml Wed Dec 21 17:26:25 2016 -0500 @@ -0,0 +1,527 @@ +<tool id="methylkit" name="methylKit" version="0.99.2"> + <description>DNA methylation analysis and annotation</description> + <macros> + <macro name="differential_methylation"> + <param name="overdispersion" type="select" + label="overdispersion" + help="if set to 'none' (default), no overdispersion + correction will be attempted. 'MN' applies a scaling + parameter to variance estimated by the model. + If set to 'shrinkMN' (experimental parameter), + scaling parameter will be shrunk towards a common value"> + <option value="none" selected="True"> + none + </option> + <option value="MN"> + MN + </option> + <option value="shrinkMN"> + shrinkMN + </option> + </param> + <param name="adjust" type="select" + label="adjust" + help="different methods to correct the p-values + for multiple testing (default: SLIM)."> + <option value="SLIM" selected="True"> + SLIM + </option> + <option value="holm"> + holm + </option> + <option value="hochberg"> + hochberg + </option> + <option value="hommel"> + hommel + </option> + <option value="bonferroni"> + bonferroni + </option> + <option value="BH"> + BH + </option> + <option value="BY"> + BY + </option> + <option value="fdr"> + fdr + </option> + <option value="none"> + none + </option> + <option value="qvalue"> + qvalue + </option> + </param> + <param name="effect" type="select" + label="effect" + help="method to calculate the mean methylation different + between groups using read coverage as weights (default: wmean). + When set to 'mean', the generic mean is applied and when + set to 'predicted', predicted means from the logistic + regression model is used for calculating the effect."> + <option value="wmean" selected="True"> + wmean + </option> + <option value="mean"> + mean + </option> + <option value="predicted"> + predicted + </option> + </param> + <param name="test" type="select" + label="test" + help="the statistical test used to determine + the methylation differences (default: Chisq-test). + The F-test can be chosen + if overdispersion control is applied."> + <option value="Chisq" selected="True"> + Chisq + </option> + <option value="F"> + F + </option> + </param> + <param name="qvalue_cutoff" type="float" + value="0.01" label="qvalue.cutoff" + help="cutoff for qvalue of differential methylation statistic (default:0.01)"> + <validator type="in_range" + message="Minimum 0 and maximum 1" min="0" max="1"/> + </param> + <param name="meth_cutoff" type="float" + value="25" label="meth.cutoff" + help="cutoff for absolute value of methylation percentage change between test and control (default:25)"> + <validator type="in_range" + message="Minimum 0 and maximum 100" min="0" max="100"/> + </param> + <param name="type" type="select" + label="type" + help="For retrieving + hyper-methylated regions/bases type='hyper', + for hypo-methylated type='hypo' (default:'all')"> + <option value="all" selected="True"> + all + </option> + <option value="hyper"> + hyper + </option> + <option value="hypo"> + hypo + </option> + </param> + </macro> + <macro name="clustering"> + <param name="dist" type="select" + label="dist" + help="the distance measure to be used. + (default: correlation)"> + <option value="correlation" selected="True"> + correlation + </option> + <option value="euclidean"> + euclidean + </option> + <option value="maximum"> + maximum + </option> + <option value="manhattan"> + manhattan + </option> + <option value="canberra"> + canberra + </option> + <option value="binary"> + binary + </option> + <option value="minkowski"> + minkowski + </option> + </param> + <param name="method" type="select" + label="method" + help="the agglomeration method to be used. + (default: ward)"> + <option value="ward" selected="True"> + ward + </option> + <option value="single"> + single + </option> + <option value="complete"> + complete + </option> + <option value="average"> + average + </option> + <option value="mcquitty"> + mcquitty + </option> + <option value="median"> + median + </option> + <option value="centroid"> + centroid + </option> + </param> + </macro> + </macros> + <requirements> + <requirement type="package" version="0.99.2">bioconductor-methylkit</requirement> + </requirements> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Input-Error 01" + source="both" + level="fatal" + description="Error in your input parameters: Make sure you only apply factors to selected samples." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> +<![CDATA[ + Rscript $script_file +]]> + </command> + <configfiles> + <configfile name="script_file"> + library("methylKit") + + test_files = list() + control_files = list() + + test_ids = list() + control_ids = list() + + #for $i, $s in enumerate( $test_series ) + test_ids[${i}+1] = paste("test ", ${i}+1, sep="") + test_files[${i}+1] = "${s.input.file_name}" + #end for + + #for $i, $s in enumerate( $control_series ) + control_ids[${i}+1] = paste("control ", ${i}+1, sep="") + control_files[${i}+1] = "${s.input.file_name}" + #end for + + input_files = append(test_files, control_files) + sample_ids = append(test_ids, control_ids) + treatment_tag = c(rep.int(1, length(test_ids)), rep.int(0, length(control_ids))) + + myobj=methRead(input_files, sample.id=sample_ids, assembly="${assembly}", + pipeline="${pipeline}", treatment=treatment_tag) + + pdf('output_statistics.pdf') + for (obj in myobj){ + getMethylationStats(obj,plot=TRUE,both.strands=FALSE) + getCoverageStats(obj,plot=TRUE,both.strands=FALSE) + } + devname = dev.off() + + ## unite function + methidh=unite(myobj) + + pdf("output_correlation.pdf") + getCorrelation(object = methidh, plot=TRUE, method = "${correlation}") + devname = dev.off() + + #if $input_type.choice in ["all", "differential_methylation"]: + ## the last two arguments slim, weighted.mean + ## have the redundant counterparts in effect and adjust, + ## so turning them off to avoide the possible conflict. + myDiff = calculateDiffMeth(methidh, overdispersion="${input_type.overdispersion}", + adjust="${input_type.adjust}", effect="${input_type.effect}", test="${input_type.test}", + slim=FALSE, weighted.mean=FALSE) + + bedgraph(myDiff, file.name="output_myDiff.bedgraph", col.name="meth.diff", + unmeth=FALSE, log.transform=FALSE, negative=FALSE, add.on="") + + MethPerChr = diffMethPerChr(myDiff, plot=FALSE, + qvalue.cutoff=${input_type.qvalue_cutoff}, + meth.cutoff=${input_type.meth_cutoff}) + write.table(MethPerChr, sep="\t", row.names=FALSE, quote=FALSE, file="output_MethPerChr.tsv") + + MethylDiff = getMethylDiff(myDiff, difference=${input_type.meth_cutoff}, + qvalue=${input_type.qvalue_cutoff}, type="${input_type.type}") + bedgraph(MethylDiff, file.name="output_MethylDiff.bedgraph", col.name="meth.diff", + unmeth=FALSE,log.transform=FALSE,negative=FALSE,add.on="") + #end if + + #if $input_type.choice in ["all", "clustering"]: + pdf( "output_clustering.pdf" ) + methClust = clusterSamples(methidh, dist="${input_type.dist}", method="${input_type.method}") + devname = dev.off() + + pdf( "output_PCA.pdf" ) + PCASamples(methidh) + devname = dev.off() + #end if + + #if $input_type.choice in ["all", "segmentation"]: + ## methSeg works for methylRaw or methylDiff with resolution region, + ## so methylBase has to be tiled before + tileRaw = tileMethylCounts(myobj[[1]]) + tileBase = tileMethylCounts(methidh) + tileDiff = calculateDiffMeth(tileBase) + + ## methseg generates Granges + segRaw = methSeg(tileRaw, diagnostic.plot = FALSE) + segDiff = methSeg(tileDiff, diagnostic.plot = FALSE) + + ## and can be exported as BED + methSeg2bed(segments = segRaw, filename = "output_seg_raw.bed") + methSeg2bed(segments = segDiff, filename = "output_seg_diff.bed") + #end if + </configfile> + </configfiles> + <inputs> + <repeat name="test_series" title="Test samples" min="1"> + <param name="input" type="data" format="tabular" label="Add a file" + help="Such input file may be obtained from AMP pipeline for aligning RRBS reads."> + <validator type="unspecified_build" /> + </param> + </repeat> + <repeat name="control_series" title="Control samples" min="1"> + <param name="input" type="data" format="tabular" label="Add a file" + help="Such input file may be obtained from AMP pipeline for aligning RRBS reads." > + <validator type="unspecified_build" /> + </param> + </repeat> + <param name="assembly" type="text" + value="hg18" label="assembly" + help="A string that defines the genome assembly such as + hg18, mm9 (default: hg18)."> + </param> + <param name="correlation" type="select" + label="correlation" + help="correlation method (default: pearson)"> + <option value="pearson" selected="True"> + pearson + </option> + <option value="kendall"> + kendall + </option> + <option value="spearman"> + spearman + </option> + </param> + <param name="pipeline" type="select" + label="pipeline" + help="name of the alignment pipeline (default: amp)"> + <option value="amp" selected="True"> + amp + </option> + <option value="bismark"> + bismark + </option> + <option value="bismarkCoverage"> + bismarkCoverage + </option> + <option value="bismarkCytosineReport"> + bismarkCytosineReport + </option> + </param> + <conditional name="input_type"> + <param name="choice" type="select" + label="analysis to carry out:" + help="The analysis you wish to carry out."> + <option value="all" selected="True"> + All provided analysis + </option> + <option value="differential_methylation"> + Differential methylation + </option> + <option value="clustering"> + Clustering + </option> + <option value="segmentation"> + Segmentation + </option> + </param> + <when value="all"> + <expand macro="differential_methylation" /> + <expand macro="clustering" /> + </when> + <when value="differential_methylation"> + <expand macro="differential_methylation" /> + </when> + <when value="clustering"> + <expand macro="clustering" /> + </when> + <when value="segmentation" /> + </conditional> + </inputs> + <outputs> + <data name="output_statistics" format="pdf" + from_work_dir="output_statistics.pdf" + label="${tool.name} on ${on_string}: CpG statistics"> + </data> + + <data name="output_correlation" format="pdf" + from_work_dir="output_correlation.pdf" + label="${tool.name} on ${on_string}: correlation between samples"> + </data> + + <data name="output_myDiff" format="bedgraph" + from_work_dir="output_myDiff.bedgraph" + label="${tool.name} on ${on_string}: differential methylation"> + <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> + </data> + + <data name="output_MethylDiff" format="bedgraph" + from_work_dir="output_MethylDiff.bedgraph" + label="${tool.name} on ${on_string}: differential methylation - subset"> + <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> + </data> + + <data name="output_MethPerChr" format="tabular" + from_work_dir="output_MethPerChr.tsv" + label="${tool.name} on ${on_string}: number of hyper/hypo sites"> + <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> + </data> + + <data name="output_clustering" format="pdf" + from_work_dir="output_clustering.pdf" + label="${tool.name} on ${on_string}: hierarchical clustering"> + <filter>input_type['choice'] in ['all', 'clustering']</filter> + </data> + + <data name="output_PCA" format="pdf" + from_work_dir="output_PCA.pdf" + label="${tool.name} on ${on_string}: PCA"> + <filter>input_type['choice'] in ['all', 'clustering']</filter> + </data> + + <data name="output_seg_raw" format="bed" + from_work_dir="output_seg_raw.bed" + label="${tool.name} on ${on_string}: methylation segment"> + <filter>input_type['choice'] in ['all', 'segmentation']</filter> + </data> + + <data name="output_seg_diff" format="bed" + from_work_dir="output_seg_diff.bed" + label="${tool.name} on ${on_string}: differential methylation segment"> + <filter>input_type['choice'] in ['all', 'segmentation']</filter> + </data> + </outputs> + <tests> + <test> + <repeat name="test_series"> + <param name="input" value="input_test1.myCpG.txt" dbkey="hg18" ftype="tabular" /> + </repeat> + <repeat name="test_series"> + <param name="input" value="input_test2.myCpG.txt" dbkey="hg18" ftype="tabular" /> + </repeat> + <repeat name="control_series"> + <param name="input" value="input_control1.myCpG.txt" dbkey="hg18" ftype="tabular" /> + </repeat> + <repeat name="control_series"> + <param name="input" value="input_control2.myCpG.txt" dbkey="hg18" ftype="tabular" /> + </repeat> + <param name="assembly" value="hg18" /> + <param name="correlation" value="pearson" /> + <param name="pipeline" value="amp" /> + <param name="choice" value="all" /> + <param name="overdispersion" value="none" /> + <param name="adjust" value="SLIM" /> + <param name="effect" value="wmean" /> + <param name="test" value="Chisq" /> + <param name="qvalue_cutoff" value="0.01" /> + <param name="meth_cutoff" value="25" /> + <param name="type" value="all" /> + <param name="dist" value="correlation" /> + <param name="method" value="ward" /> + <output name="output_statistics" file="output_statistics.pdf" + ftype="pdf" compare="sim_size"/> + <output name="output_correlation" file="output_correlation.pdf" + ftype="pdf" compare="sim_size"/> + <output name="output_myDiff" file="output_myDiff.bedgraph" + ftype="bedgraph"/> + <output name="output_MethPerChr" file="output_MethPerChr.tsv" + ftype="tabular"/> + <output name="output_MethylDiff" file="output_MethylDiff.bedgraph" + ftype="bedgraph"/> + <output name="output_clustering" file="output_clustering.pdf" + ftype="pdf" compare="sim_size"/> + <output name="output_PCA" file="output_PCA.pdf" + ftype="pdf" compare="sim_size"/> + <output name="output_seg_raw" file="output_seg_raw.bed" + ftype="bed"/> + <output name="output_seg_diff" file="output_seg_diff.bed" + ftype="bed"/> + </test> + </tests> +<help> +<![CDATA[ +.. class:: infomark + +**What it does** + +`methylKit`_ is an R package for DNA methylation analysis and annotation +from high-throughput bisulfite sequencing. +The package is designed to deal with sequencing data from RRBS and +its variants, but also target-capture methods such as Agilent SureSelect +methyl-seq. In addition, methylKit can deal with base-pair resolution data +for 5hmC obtained from Tab-seq or oxBS-seq. It can also handle whole-genome +bisulfite sequencing data if proper input format is provided. + +.. _methylKit: https://github.com/al2na/methylKit + +The Galaxy tool enables three types of analysis: + * differential methylation + * clustering + * segmentation + +The user can choose to run all provided analysis or run an individual one. + +.. class:: infomark + +**Input** + +Typically, bisulfite converted reads are aligned to the genome and % +methylation value per base is calculated by processing alignments. +methylKit takes that % methylation value per base information as input. +Such input file may be obtained from `AMP`_ pipeline +for aligning RRBS reads. A typical input file looks like this:: + + + chrBase chr base strand coverage freqC freqT + + chr21.9764539 chr21 9764539 R 12 25.00 75.00 + + chr21.9764513 chr21 9764513 F 12 0.00 100.00 + + +.. _AMP: http://code.google.com/p/amp-errbs/ + +.. class:: infomark + +**Output** + +The outputs from differential methylation + * ``differential methylation``: The `bedgraph`_ file contains differentially methylated bases/regions and the corresponding statistics. + * ``differential methylation - subset``: The bedgraph file contains the subset of differentially methylated bases/regions that satisfies the user defined thresholds with qvalue.cutoff and meth.cutoff. + * ``number of hyper/hypo sites``: The tabular file contains number of hyper/hypo methylated regions/bases. + +.. _bedgraph: https://genome.ucsc.edu/goldenpath/help/bedgraph.html + +The outputs from clustering + * ``hierarchical clustering``: The figure shows hierarchical clustering using methylation data. + * ``PCA``: The figure shows principal components analysis of methylation data. + +The output from segmentation + * ``methylation segment``: The `bed`_ file contains the profile of methylation segment. + * ``differential methylation segment``: The bed file contains the profile of differential methylation segment. + +.. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1 +]]> +</help> +<citations> + <citation type="doi">10.1186/gb-2012-13-10-r87</citation> +</citations> +</tool>