diff methylkit.xml @ 0:baede5a87a90 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/methylkit commit 14f0b39f64982773ef0367379b915f742eabcc1b
author rnateam
date Wed, 21 Dec 2016 17:26:25 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/methylkit.xml	Wed Dec 21 17:26:25 2016 -0500
@@ -0,0 +1,527 @@
+<tool id="methylkit" name="methylKit" version="0.99.2">
+  <description>DNA methylation analysis and annotation</description>
+  <macros>
+        <macro name="differential_methylation">
+          <param name="overdispersion" type="select"
+              label="overdispersion"
+              help="if set to 'none' (default), no overdispersion
+              correction will be attempted. 'MN' applies a scaling
+              parameter to variance estimated by the model.
+              If set to 'shrinkMN' (experimental parameter),
+              scaling parameter will be shrunk towards a common value">
+              <option value="none" selected="True">
+                  none
+              </option>
+              <option value="MN">
+                  MN
+              </option>
+              <option value="shrinkMN">
+                  shrinkMN
+              </option>
+          </param>
+          <param name="adjust" type="select"
+              label="adjust"
+              help="different methods to correct the p-values
+              for multiple testing (default: SLIM).">
+              <option value="SLIM" selected="True">
+                  SLIM
+              </option>
+              <option value="holm">
+                  holm
+              </option>
+              <option value="hochberg">
+                  hochberg
+              </option>
+              <option value="hommel">
+                  hommel
+              </option>
+              <option value="bonferroni">
+                  bonferroni
+              </option>
+              <option value="BH">
+                  BH
+              </option>
+              <option value="BY">
+                  BY
+              </option>
+              <option value="fdr">
+                  fdr
+              </option>
+              <option value="none">
+                  none
+              </option>
+              <option value="qvalue">
+                  qvalue
+              </option>
+          </param>
+          <param name="effect" type="select"
+              label="effect"
+              help="method to calculate the mean methylation different
+              between groups using read coverage as weights (default: wmean).
+              When set to 'mean', the generic mean is applied and when
+              set to 'predicted', predicted means from the logistic
+              regression model is used for calculating the effect.">
+              <option value="wmean" selected="True">
+                  wmean
+              </option>
+              <option value="mean">
+                  mean
+              </option>
+              <option value="predicted">
+                  predicted
+              </option>
+          </param>
+          <param name="test" type="select"
+              label="test"
+              help="the statistical test used to determine
+              the methylation differences (default: Chisq-test).
+              The F-test can be chosen
+              if overdispersion control is applied.">
+              <option value="Chisq" selected="True">
+                  Chisq
+              </option>
+              <option value="F">
+                  F
+              </option>
+          </param>
+          <param name="qvalue_cutoff" type="float"
+              value="0.01" label="qvalue.cutoff"
+              help="cutoff for qvalue of differential methylation statistic (default:0.01)">
+              <validator type="in_range"
+                  message="Minimum 0 and maximum 1" min="0" max="1"/>
+          </param>
+          <param name="meth_cutoff" type="float"
+              value="25" label="meth.cutoff"
+              help="cutoff for absolute value of methylation percentage change between test and control (default:25)">
+              <validator type="in_range"
+                  message="Minimum 0 and maximum 100" min="0" max="100"/>
+          </param>
+          <param name="type" type="select"
+              label="type"
+              help="For retrieving
+              hyper-methylated regions/bases type='hyper',
+              for hypo-methylated type='hypo' (default:'all')">
+              <option value="all" selected="True">
+                  all
+              </option>
+              <option value="hyper">
+                  hyper
+              </option>
+              <option value="hypo">
+                  hypo
+              </option>
+          </param>
+        </macro>
+        <macro name="clustering">
+          <param name="dist" type="select"
+              label="dist"
+              help="the distance measure to be used.
+              (default: correlation)">
+              <option value="correlation" selected="True">
+                  correlation
+              </option>
+              <option value="euclidean">
+                  euclidean
+              </option>
+              <option value="maximum">
+                  maximum
+              </option>
+              <option value="manhattan">
+                  manhattan
+              </option>
+              <option value="canberra">
+                  canberra
+              </option>
+              <option value="binary">
+                  binary
+              </option>
+              <option value="minkowski">
+                  minkowski
+              </option>
+          </param>
+          <param name="method" type="select"
+              label="method"
+              help="the agglomeration method to be used.
+              (default: ward)">
+              <option value="ward" selected="True">
+                  ward
+              </option>
+              <option value="single">
+                  single
+              </option>
+              <option value="complete">
+                  complete
+              </option>
+              <option value="average">
+                  average
+              </option>
+              <option value="mcquitty">
+                  mcquitty
+              </option>
+              <option value="median">
+                  median
+              </option>
+              <option value="centroid">
+                  centroid
+              </option>
+          </param>
+        </macro>
+  </macros>
+  <requirements>
+    <requirement type="package" version="0.99.2">bioconductor-methylkit</requirement>
+  </requirements>
+  <stdio>
+        <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+        <regex match="Input-Error 01"
+           source="both"
+           level="fatal"
+           description="Error in your input parameters: Make sure you only apply factors to selected samples." />
+        <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An undefined error occured, please check your intput carefully and contact your administrator." />
+  </stdio>
+  <command>
+<![CDATA[
+      Rscript $script_file
+]]>
+  </command>
+  <configfiles>
+    <configfile name="script_file">
+        library("methylKit")
+
+        test_files = list()
+        control_files = list()
+
+        test_ids = list()
+        control_ids = list()
+
+        #for $i, $s in enumerate( $test_series )
+          test_ids[${i}+1] = paste("test ", ${i}+1, sep="")
+          test_files[${i}+1] = "${s.input.file_name}"
+        #end for
+
+        #for $i, $s in enumerate( $control_series )
+          control_ids[${i}+1] = paste("control ", ${i}+1, sep="")
+          control_files[${i}+1] = "${s.input.file_name}"
+        #end for
+
+        input_files = append(test_files, control_files)
+        sample_ids = append(test_ids, control_ids)
+        treatment_tag = c(rep.int(1, length(test_ids)), rep.int(0, length(control_ids)))
+
+        myobj=methRead(input_files, sample.id=sample_ids, assembly="${assembly}",
+                    pipeline="${pipeline}", treatment=treatment_tag)
+
+        pdf('output_statistics.pdf')
+        for (obj in myobj){
+          getMethylationStats(obj,plot=TRUE,both.strands=FALSE)
+          getCoverageStats(obj,plot=TRUE,both.strands=FALSE)
+        }
+        devname = dev.off()
+
+        ## unite function
+        methidh=unite(myobj)
+
+        pdf("output_correlation.pdf")
+        getCorrelation(object = methidh, plot=TRUE, method = "${correlation}")
+        devname = dev.off()
+
+        #if $input_type.choice in ["all", "differential_methylation"]:
+            ## the last two arguments slim, weighted.mean
+            ## have the redundant counterparts in effect and adjust,
+            ## so turning them off to avoide the possible conflict.
+            myDiff = calculateDiffMeth(methidh, overdispersion="${input_type.overdispersion}",
+            adjust="${input_type.adjust}", effect="${input_type.effect}", test="${input_type.test}",
+            slim=FALSE, weighted.mean=FALSE)
+
+            bedgraph(myDiff, file.name="output_myDiff.bedgraph", col.name="meth.diff",
+                     unmeth=FALSE, log.transform=FALSE, negative=FALSE, add.on="")
+
+            MethPerChr = diffMethPerChr(myDiff, plot=FALSE,
+                                        qvalue.cutoff=${input_type.qvalue_cutoff},
+                                        meth.cutoff=${input_type.meth_cutoff})
+            write.table(MethPerChr, sep="\t", row.names=FALSE, quote=FALSE, file="output_MethPerChr.tsv")
+
+            MethylDiff = getMethylDiff(myDiff, difference=${input_type.meth_cutoff},
+                                        qvalue=${input_type.qvalue_cutoff}, type="${input_type.type}")
+            bedgraph(MethylDiff, file.name="output_MethylDiff.bedgraph", col.name="meth.diff",
+                     unmeth=FALSE,log.transform=FALSE,negative=FALSE,add.on="")
+        #end if
+
+        #if $input_type.choice in ["all", "clustering"]:
+            pdf( "output_clustering.pdf" )
+            methClust = clusterSamples(methidh, dist="${input_type.dist}", method="${input_type.method}")
+            devname = dev.off()
+
+            pdf( "output_PCA.pdf" )
+            PCASamples(methidh)
+            devname = dev.off()
+        #end if
+
+        #if $input_type.choice in ["all", "segmentation"]:
+            ## methSeg works for methylRaw or methylDiff with resolution region,
+            ## so methylBase has to be tiled before
+            tileRaw = tileMethylCounts(myobj[[1]])
+            tileBase = tileMethylCounts(methidh)
+            tileDiff = calculateDiffMeth(tileBase)
+
+            ## methseg generates Granges
+            segRaw = methSeg(tileRaw, diagnostic.plot = FALSE)
+            segDiff = methSeg(tileDiff, diagnostic.plot = FALSE)
+
+            ## and can be exported as BED
+            methSeg2bed(segments = segRaw, filename = "output_seg_raw.bed")
+            methSeg2bed(segments = segDiff, filename = "output_seg_diff.bed")
+        #end if
+    </configfile>
+  </configfiles>
+  <inputs>
+    <repeat name="test_series" title="Test samples" min="1">
+                <param name="input" type="data" format="tabular" label="Add a file"
+                    help="Such input file may be obtained from AMP pipeline for aligning RRBS reads.">
+                    <validator type="unspecified_build" />
+                </param>
+    </repeat>
+    <repeat name="control_series" title="Control samples" min="1">
+                <param name="input" type="data" format="tabular" label="Add a file"
+                    help="Such input file may be obtained from AMP pipeline for aligning RRBS reads." >
+                    <validator type="unspecified_build" />
+                </param>
+    </repeat>
+    <param name="assembly" type="text"
+        value="hg18" label="assembly"
+        help="A string that defines the genome assembly such as
+        hg18, mm9 (default: hg18).">
+    </param>
+    <param name="correlation" type="select"
+        label="correlation"
+        help="correlation method (default: pearson)">
+        <option value="pearson" selected="True">
+        pearson
+        </option>
+        <option value="kendall">
+        kendall
+        </option>
+        <option value="spearman">
+        spearman
+        </option>
+    </param>
+    <param name="pipeline" type="select"
+        label="pipeline"
+        help="name of the alignment pipeline (default: amp)">
+        <option value="amp" selected="True">
+        amp
+        </option>
+        <option value="bismark">
+        bismark
+        </option>
+        <option value="bismarkCoverage">
+        bismarkCoverage
+        </option>
+        <option value="bismarkCytosineReport">
+        bismarkCytosineReport
+        </option>
+    </param>
+    <conditional name="input_type">
+        <param name="choice" type="select"
+            label="analysis to carry out:"
+            help="The analysis you wish to carry out.">
+            <option value="all" selected="True">
+            All provided analysis
+            </option>
+            <option value="differential_methylation">
+            Differential methylation
+            </option>
+            <option value="clustering">
+            Clustering
+            </option>
+            <option value="segmentation">
+            Segmentation
+            </option>
+        </param>
+        <when value="all">
+          <expand macro="differential_methylation" />
+          <expand macro="clustering" />
+        </when>
+        <when value="differential_methylation">
+          <expand macro="differential_methylation" />
+        </when>
+        <when value="clustering">
+          <expand macro="clustering" />
+        </when>
+        <when value="segmentation" />
+    </conditional>
+  </inputs>
+  <outputs>
+      <data name="output_statistics" format="pdf"
+      from_work_dir="output_statistics.pdf"
+      label="${tool.name} on ${on_string}: CpG statistics">
+      </data>
+
+      <data name="output_correlation" format="pdf"
+      from_work_dir="output_correlation.pdf"
+      label="${tool.name} on ${on_string}: correlation between samples">
+      </data>
+
+      <data name="output_myDiff" format="bedgraph"
+      from_work_dir="output_myDiff.bedgraph"
+      label="${tool.name} on ${on_string}: differential methylation">
+      <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
+      </data>
+
+      <data name="output_MethylDiff" format="bedgraph"
+      from_work_dir="output_MethylDiff.bedgraph"
+      label="${tool.name} on ${on_string}: differential methylation - subset">
+      <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
+      </data>
+
+      <data name="output_MethPerChr" format="tabular"
+      from_work_dir="output_MethPerChr.tsv"
+      label="${tool.name} on ${on_string}: number of hyper/hypo sites">
+      <filter>input_type['choice'] in ['all', 'differential_methylation']</filter>
+      </data>
+
+      <data name="output_clustering" format="pdf"
+      from_work_dir="output_clustering.pdf"
+      label="${tool.name} on ${on_string}: hierarchical clustering">
+      <filter>input_type['choice'] in ['all', 'clustering']</filter>
+      </data>
+
+      <data name="output_PCA" format="pdf"
+      from_work_dir="output_PCA.pdf"
+      label="${tool.name} on ${on_string}: PCA">
+      <filter>input_type['choice'] in ['all', 'clustering']</filter>
+      </data>
+
+      <data name="output_seg_raw" format="bed"
+      from_work_dir="output_seg_raw.bed"
+      label="${tool.name} on ${on_string}: methylation segment">
+      <filter>input_type['choice'] in ['all', 'segmentation']</filter>
+      </data>
+
+      <data name="output_seg_diff" format="bed"
+      from_work_dir="output_seg_diff.bed"
+      label="${tool.name} on ${on_string}: differential methylation segment">
+      <filter>input_type['choice'] in ['all', 'segmentation']</filter>
+      </data>
+  </outputs>
+  <tests>
+    <test>
+        <repeat name="test_series">
+            <param name="input" value="input_test1.myCpG.txt" dbkey="hg18" ftype="tabular" />
+        </repeat>
+        <repeat name="test_series">
+            <param name="input" value="input_test2.myCpG.txt" dbkey="hg18" ftype="tabular" />
+        </repeat>
+        <repeat name="control_series">
+            <param name="input" value="input_control1.myCpG.txt" dbkey="hg18" ftype="tabular" />
+        </repeat>
+        <repeat name="control_series">
+            <param name="input" value="input_control2.myCpG.txt" dbkey="hg18" ftype="tabular" />
+        </repeat>
+        <param name="assembly" value="hg18" />
+        <param name="correlation" value="pearson" />
+        <param name="pipeline" value="amp" />
+        <param name="choice" value="all" />
+        <param name="overdispersion" value="none" />
+        <param name="adjust" value="SLIM" />
+        <param name="effect" value="wmean" />
+        <param name="test" value="Chisq" />
+        <param name="qvalue_cutoff" value="0.01" />
+        <param name="meth_cutoff" value="25" />
+        <param name="type" value="all" />
+        <param name="dist" value="correlation" />
+        <param name="method" value="ward" />
+        <output name="output_statistics" file="output_statistics.pdf"
+        ftype="pdf" compare="sim_size"/>
+        <output name="output_correlation" file="output_correlation.pdf"
+        ftype="pdf" compare="sim_size"/>
+        <output name="output_myDiff" file="output_myDiff.bedgraph"
+        ftype="bedgraph"/>
+        <output name="output_MethPerChr" file="output_MethPerChr.tsv"
+        ftype="tabular"/>
+        <output name="output_MethylDiff" file="output_MethylDiff.bedgraph"
+        ftype="bedgraph"/>
+        <output name="output_clustering" file="output_clustering.pdf"
+        ftype="pdf" compare="sim_size"/>
+        <output name="output_PCA" file="output_PCA.pdf"
+        ftype="pdf" compare="sim_size"/>
+        <output name="output_seg_raw" file="output_seg_raw.bed"
+        ftype="bed"/>
+        <output name="output_seg_diff" file="output_seg_diff.bed"
+        ftype="bed"/>
+    </test>
+  </tests>
+<help>
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+`methylKit`_ is an R package for DNA methylation analysis and annotation
+from high-throughput bisulfite sequencing.
+The package is designed to deal with sequencing data from RRBS and
+its variants, but also target-capture methods such as Agilent SureSelect
+methyl-seq. In addition, methylKit can deal with base-pair resolution data
+for 5hmC obtained from Tab-seq or oxBS-seq. It can also handle whole-genome
+bisulfite sequencing data if proper input format is provided.
+
+.. _methylKit: https://github.com/al2na/methylKit
+
+The Galaxy tool enables three types of analysis:
+  * differential methylation
+  * clustering
+  * segmentation
+
+The user can choose to run all provided analysis or run an individual one.
+
+.. class:: infomark
+
+**Input**
+
+Typically, bisulfite converted reads are aligned to the genome and %
+methylation value per base is calculated by processing alignments.
+methylKit takes that  % methylation value per base information as input.
+Such input file may be obtained from `AMP`_ pipeline
+for aligning RRBS reads. A typical input file looks like this::
+
+
+    chrBase       chr   base    strand coverage freqC freqT
+
+    chr21.9764539 chr21 9764539 R      12       25.00 75.00
+
+    chr21.9764513 chr21 9764513 F      12       0.00  100.00
+
+
+.. _AMP: http://code.google.com/p/amp-errbs/
+
+.. class:: infomark
+
+**Output**
+
+The outputs from differential methylation
+  * ``differential methylation``: The `bedgraph`_ file contains differentially methylated bases/regions and the corresponding statistics.
+  * ``differential methylation - subset``: The bedgraph file contains the subset of differentially methylated bases/regions that satisfies the user defined thresholds with qvalue.cutoff and meth.cutoff.
+  * ``number of hyper/hypo sites``: The tabular file contains number of hyper/hypo methylated regions/bases.
+
+.. _bedgraph: https://genome.ucsc.edu/goldenpath/help/bedgraph.html
+
+The outputs from clustering
+  * ``hierarchical clustering``: The figure shows hierarchical clustering using methylation data.
+  * ``PCA``: The figure shows principal components analysis of methylation data.
+
+The output from segmentation
+  * ``methylation segment``: The `bed`_ file contains the profile of methylation segment.
+  * ``differential methylation segment``: The bed file contains the profile of differential methylation segment.
+
+.. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1
+]]>
+</help>
+<citations>
+    <citation type="doi">10.1186/gb-2012-13-10-r87</citation>
+</citations>
+</tool>