donotinstall: goseq.xml comparison

comparison goseq.xml @ 0:8c4a74852e63 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 16ab3a91da29ad4fae86d2c0b1b8b862a3dedcb5-dirty

author	mvdbeek
date	Thu, 01 Jun 2017 11:23:54 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:8c4a74852e63
+<tool id="goseq" name="goseq" version="0.2.2">
+<description>tests for overrepresented gene categories</description>
+<requirements>
+<requirement type="package" version="1.3.2">r-optparse</requirement>
+<requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
+</requirements>
+<stdio>
+<regex match="Execution halted"
+source="both"
+level="fatal"
+description="Execution halted." />
+<regex match="Error in"
+source="both"
+level="fatal"
+description="An undefined error occured, please check your input carefully and contact your administrator." />
+<regex match="Fatal error"
+source="both"
+level="fatal"
+description="An undefined error occured, please check your input carefully and contact your administrator." />
+</stdio>
+<command><![CDATA[
+Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file'
+--length_file '$length_file'
+--category_file '$category_file'
+#if $methods['wallenius']:
+--wallenius_tab '$wallenius_tab'
+#end if
+#if $methods['hypergeometric']:
+--nobias_tab 'nobias_tab'
+#end if
+--repcnt '$methods.repcnt'
+--sampling_tab '$sampling_tab'
+--p_adj_method '$p_adj_method'
+--use_genes_without_cat '$use_genes_without_cat'
+--make_plots '$make_plots'
+--length_bias_plot '$length_bias_plot'
+--sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
+]]></command>
+<inputs>
+<param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" />
+<param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" />
+<param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" />
+<param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
+label="Count genes without any category?" type="boolean"/>
+<section name="methods" title="Method options" expanded="True">
+<param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" />
+<param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" />
+<param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" />
+</section>
+<param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
+<option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option>
+<option value="holm">Holm (1979)</option>
+<option value="hommel">Hommel (1988)</option>
+<option value="hochberg">Hochberg (1988)</option>
+<option value="bonferroni">Bonferroni</option>
+<option value="BY">Benjamini - Yekutieli (2001)</option>
+</param>
+<param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param>
+</inputs>
+<outputs>
+<data name="length_bias_plot" format="pdf" label="length bias plot">
+<filter>make_plots</filter>
+<filter>methods['hypergeometric']</filter>
+</data>
+<data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution">
+<filter>methods['repcnt'] != 0</filter>
+<filter>methods['wallenius']</filter>
+<filter>make_plots</filter>
+</data>
+<data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction">
+<filter>methods['hypergeometric']</filter>
+</data>
+<data name="sampling_tab" format="tabular" label="Ranked category list - sampling">
+<filter>methods['repcnt'] != 0</filter>
+</data>
+<data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method">
+<filter>methods['wallenius']</filter>
+</data>
+</outputs>
+<tests>
+<test>
+<param name="dge_file" value="dge_list.tab" ftype="tabular"/>
+<param name="length_file" value="gene_length.tab" ftype="tabular"/>
+<param name="category_file" value="category.tab" ftype="tabular"/>
+<param name="use_genes_without_cat" value="true" />
+<output name="wallenius_tab" file="wal.tab" compare="re_match"/>
+</test>
+</tests>
+<help>
+**What it does**
+Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.
+Options map closely to the excellent manual_
+**Input files**
+*DGE list:*
+goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column.
+TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed.
+You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset.
+*Gene length file:*
+goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes
+using a prodbability weight function (PWF).
+The format of this file is tabular, with gene_id in the first column and length in the second column.
+The "get length and gc content" tool can produce such a file.
+*Gene category file:*
+You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,
+category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive
+gene ontologies for model organisms, or you can construct your own file.
+**Method options**
+3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows.
+*"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution.
+This distribution assumes that within a category all genes have the same probability of being chosen.
+Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small.
+*"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories.
+Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive.
+*"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution.
+Useful if you wish to test the effect of selection bias on your results.
+CAUTION:  "Hypergeometric" should NEVER be used for producing results for biological interpretation.
+If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results.
+.. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
+</help>
+<citations>
+<citation type="doi">10.1186/gb-2010-11-2-r14</citation>
+</citations>
+</tool>

Mercurial > repos > mvdbeek > donotinstall

comparison goseq.xml @ 0:8c4a74852e63 draft default tip