Mercurial > repos > mvdbeek > donotinstall
comparison goseq.xml @ 0:8c4a74852e63 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 16ab3a91da29ad4fae86d2c0b1b8b862a3dedcb5-dirty
| author | mvdbeek |
|---|---|
| date | Thu, 01 Jun 2017 11:23:54 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8c4a74852e63 |
|---|---|
| 1 <tool id="goseq" name="goseq" version="0.2.2"> | |
| 2 <description>tests for overrepresented gene categories</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.3.2">r-optparse</requirement> | |
| 5 <requirement type="package" version="1.22.0">bioconductor-goseq</requirement> | |
| 6 </requirements> | |
| 7 <stdio> | |
| 8 <regex match="Execution halted" | |
| 9 source="both" | |
| 10 level="fatal" | |
| 11 description="Execution halted." /> | |
| 12 <regex match="Error in" | |
| 13 source="both" | |
| 14 level="fatal" | |
| 15 description="An undefined error occured, please check your input carefully and contact your administrator." /> | |
| 16 <regex match="Fatal error" | |
| 17 source="both" | |
| 18 level="fatal" | |
| 19 description="An undefined error occured, please check your input carefully and contact your administrator." /> | |
| 20 </stdio> | |
| 21 <command><![CDATA[ | |
| 22 Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file' | |
| 23 --length_file '$length_file' | |
| 24 --category_file '$category_file' | |
| 25 #if $methods['wallenius']: | |
| 26 --wallenius_tab '$wallenius_tab' | |
| 27 #end if | |
| 28 #if $methods['hypergeometric']: | |
| 29 --nobias_tab 'nobias_tab' | |
| 30 #end if | |
| 31 --repcnt '$methods.repcnt' | |
| 32 --sampling_tab '$sampling_tab' | |
| 33 --p_adj_method '$p_adj_method' | |
| 34 --use_genes_without_cat '$use_genes_without_cat' | |
| 35 --make_plots '$make_plots' | |
| 36 --length_bias_plot '$length_bias_plot' | |
| 37 --sample_vs_wallenius_plot '$sample_vs_wallenius_plot' | |
| 38 ]]></command> | |
| 39 <inputs> | |
| 40 <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" /> | |
| 41 <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" /> | |
| 42 <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" /> | |
| 43 <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested" | |
| 44 label="Count genes without any category?" type="boolean"/> | |
| 45 <section name="methods" title="Method options" expanded="True"> | |
| 46 <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" /> | |
| 47 <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" /> | |
| 48 <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" /> | |
| 49 </section> | |
| 50 <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction"> | |
| 51 <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option> | |
| 52 <option value="holm">Holm (1979)</option> | |
| 53 <option value="hommel">Hommel (1988)</option> | |
| 54 <option value="hochberg">Hochberg (1988)</option> | |
| 55 <option value="bonferroni">Bonferroni</option> | |
| 56 <option value="BY">Benjamini - Yekutieli (2001)</option> | |
| 57 </param> | |
| 58 <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param> | |
| 59 </inputs> | |
| 60 <outputs> | |
| 61 <data name="length_bias_plot" format="pdf" label="length bias plot"> | |
| 62 <filter>make_plots</filter> | |
| 63 <filter>methods['hypergeometric']</filter> | |
| 64 </data> | |
| 65 <data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution"> | |
| 66 <filter>methods['repcnt'] != 0</filter> | |
| 67 <filter>methods['wallenius']</filter> | |
| 68 <filter>make_plots</filter> | |
| 69 </data> | |
| 70 <data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction"> | |
| 71 <filter>methods['hypergeometric']</filter> | |
| 72 </data> | |
| 73 <data name="sampling_tab" format="tabular" label="Ranked category list - sampling"> | |
| 74 <filter>methods['repcnt'] != 0</filter> | |
| 75 </data> | |
| 76 <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method"> | |
| 77 <filter>methods['wallenius']</filter> | |
| 78 </data> | |
| 79 </outputs> | |
| 80 <tests> | |
| 81 <test> | |
| 82 <param name="dge_file" value="dge_list.tab" ftype="tabular"/> | |
| 83 <param name="length_file" value="gene_length.tab" ftype="tabular"/> | |
| 84 <param name="category_file" value="category.tab" ftype="tabular"/> | |
| 85 <param name="use_genes_without_cat" value="true" /> | |
| 86 <output name="wallenius_tab" file="wal.tab" compare="re_match"/> | |
| 87 </test> | |
| 88 </tests> | |
| 89 <help> | |
| 90 | |
| 91 **What it does** | |
| 92 | |
| 93 Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data. | |
| 94 | |
| 95 Options map closely to the excellent manual_ | |
| 96 | |
| 97 | |
| 98 **Input files** | |
| 99 | |
| 100 *DGE list:* | |
| 101 goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column. | |
| 102 TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed. | |
| 103 You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset. | |
| 104 | |
| 105 *Gene length file:* | |
| 106 goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes | |
| 107 using a prodbability weight function (PWF). | |
| 108 The format of this file is tabular, with gene_id in the first column and length in the second column. | |
| 109 The "get length and gc content" tool can produce such a file. | |
| 110 | |
| 111 *Gene category file:* | |
| 112 You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column, | |
| 113 category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive | |
| 114 gene ontologies for model organisms, or you can construct your own file. | |
| 115 | |
| 116 **Method options** | |
| 117 | |
| 118 3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows. | |
| 119 | |
| 120 *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution. | |
| 121 This distribution assumes that within a category all genes have the same probability of being chosen. | |
| 122 Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small. | |
| 123 | |
| 124 *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories. | |
| 125 Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive. | |
| 126 | |
| 127 *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution. | |
| 128 Useful if you wish to test the effect of selection bias on your results. | |
| 129 | |
| 130 CAUTION: "Hypergeometric" should NEVER be used for producing results for biological interpretation. | |
| 131 If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results. | |
| 132 | |
| 133 .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf | |
| 134 | |
| 135 | |
| 136 </help> | |
| 137 <citations> | |
| 138 <citation type="doi">10.1186/gb-2010-11-2-r14</citation> | |
| 139 </citations> | |
| 140 </tool> |
