comparison goseq.xml @ 6:0e9424413ab0 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
author mvdbeek
date Thu, 03 Mar 2016 09:56:51 -0500
parents b79c65c90744
children 9ffae7bc23c2
comparison
equal deleted inserted replaced
5:b79c65c90744 6:0e9424413ab0
1 <tool id="goseq" name="goseq gene ontology analyser" version="0.1.5"> 1 <tool id="goseq" name="goseq" version="0.2.0">
2 <description /> 2 <description>tests for overrepresented gene categories</description>
3 <requirements> 3 <macros>
4 <requirement type="package" version="3.2.1">R</requirement> 4 <import>go_macros.xml</import>
5 <requirement type="package" version="1.22.0">goseq</requirement> 5 </macros>
6 </requirements> 6 <expand macro="requirements" />
7 <expand macro="stdio" />
7 <command interpreter="Rscript"> 8 <command interpreter="Rscript">
8 goseq.r --dge_file "$dge_file" 9 goseq.r --dge_file "$dge_file"
9 --p_adj_column "$p_adj_column" 10 --p_adj_column "$p_adj_column"
10 --cutoff "$p_adj_cutoff" 11 --cutoff "$p_adj_cutoff"
11 #if $source.use_length_file == "yes":
12 --length_file "$length_file" 12 --length_file "$length_file"
13 #end if 13 --category_file "$category_file"
14 --genome "$genome"
15 --gene_id "$gene_id"
16 --wallenius_tab "$wallenius_tab" 14 --wallenius_tab "$wallenius_tab"
17 --sampling_tab "$sampling_tab" 15 --sampling_tab "$sampling_tab"
18 --nobias_tab "$nobias_tab" 16 --nobias_tab "$nobias_tab"
19 --length_bias_plot "$length_bias_plot" 17 --length_bias_plot "$length_bias_plot"
20 --sample_vs_wallenius_plot "$sample_vs_wallenius_plot" 18 --sample_vs_wallenius_plot "$sample_vs_wallenius_plot"
21 --repcnt "$repcnt" 19 --repcnt "$repcnt"
22 --use_genes_without_cat "$use_genes_without_cat" 20 --use_genes_without_cat "$use_genes_without_cat"
21 --make_plots "$make_plots"
23 </command> 22 </command>
24 <inputs> 23 <inputs>
25 <param help="deseq2/edger/limma differential gene expression list" label="DGE list" name="dge_file" type="data" format="tabular" /> 24 <param help="deseq2/edger/limma differential gene expression list" label="DGE list" name="dge_file" type="data" format="tabular" />
26 <param help="Select the column that contains the multiple-testing corrected p-value" label="p adjust column" name="p_adj_column" type="data_column" numeric="true" data_ref="dge_file"/> 25 <param help="Select the column that contains the multiple-testing corrected p-value" label="p adjust column" name="p_adj_column" type="data_column" numeric="true" data_ref="dge_file"/>
27 <param help="A boolean to indicate whether genes without a categorie should still be used. For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested" 26 <param label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" name="length_file" type="data" format="tabular" required="true" />
28 name="use_genes_without_cat" label="Count genes without any category" type="boolean"/> 27 <param label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" name="category_file" type="data" format="tabular" required="true" />
28 <param help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
29 name="use_genes_without_cat" label="Count genes without any category?" type="boolean"/>
29 <param help="Typically 0.05 after multiple testing correction" max="1" label="Minimum p adjust value to consider genes as differentially expressed" name="p_adj_cutoff" type="float" value="0.05" /> 30 <param help="Typically 0.05 after multiple testing correction" max="1" label="Minimum p adjust value to consider genes as differentially expressed" name="p_adj_cutoff" type="float" value="0.05" />
30 <conditional name="source"> 31 <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param>
31 <param help="This is needed if the gene length is not available in goseq. e.g. hg38 and mm10." label="Use gene length file?" name="use_length_file" type="select">
32 <option value="no">no</option>
33 <option value="yes">yes</option>
34 </param>
35 <when value="yes">
36 <param label="Gene length file" name="length_file" type="data" format="tabular" required="false" />
37 </when>
38 <when value="no">
39 </when>
40 </conditional>
41 <param help="Needed to retrieve gene length for length correction" label="Select the genome source" name="genome" size="3" type="select">
42 <options from_data_table="go_genomes"></options>
43 </param>
44 <param help="Needed for GO analysis" label="Select gene identifier" name="gene_id" type="select">
45 <options from_data_table="go_gene_ids"></options>
46 </param>
47 <param help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" name="repcnt" size="3" type="integer" min="0" max="10000" value="0" /> 32 <param help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" name="repcnt" size="3" type="integer" min="0" max="10000" value="0" />
48 </inputs> 33 </inputs>
49 <outputs> 34 <outputs>
50 <data format="pdf" label="length bias plot" name="length_bias_plot" /> 35 <data format="pdf" label="length bias plot" name="length_bias_plot">
36 <filter>make_plots</filter>
37 </data>
51 <data format="pdf" label="Plot P-value from sampling against wallenius distribution" name="sample_vs_wallenius_plot"> 38 <data format="pdf" label="Plot P-value from sampling against wallenius distribution" name="sample_vs_wallenius_plot">
52 <filter>repcnt != 0</filter> 39 <filter>repcnt != 0</filter>
40 <filter>make_plots</filter>
53 </data> 41 </data>
54 <data format="tabular" label="Ranked category list - no length bias correction" name="nobias_tab" /> 42 <data format="tabular" label="Ranked category list - no length bias correction" name="nobias_tab" />
55 <data format="tabular" label="Ranked category list - sampling" name="sampling_tab"> 43 <data format="tabular" label="Ranked category list - sampling" name="sampling_tab">
56 <filter>repcnt != 0</filter> 44 <filter>repcnt != 0</filter>
57 </data> 45 </data>
58 <data format="tabular" label="Ranked category list - wallenius approx. of p-values" name="wallenius_tab" /> 46 <data format="tabular" label="Ranked category list - wallenius approx. of p-values" name="wallenius_tab" />
59 </outputs> 47 </outputs>
60 <tests> 48 <tests>
61 <test> 49 <test>
62 <param name="dge_file" value="dge_list.tab" ftype="tabular"/> 50 <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
63 <param name="use_length_file" value="no" /> 51 <param name="length_file" value="gene_length.tab" ftype="tabular"/>
52 <param name="category_file" value="category.tab" ftype="tabular"/>
53 <param name="use_genes_without_cat" value="true" />
64 <param name="p_adj_column" value="2" /> 54 <param name="p_adj_column" value="2" />
65 <param name="genome" value="hg19" /> 55 <output name="wallenius_tab" file="wal.tab" compare="contains"/>/>
66 <param name="gene_id" value="ensGene" />
67 <output name="wallenius_tab" file="wall.tab" compare="contains"/>/>
68 </test> 56 </test>
69 </tests> 57 </tests>
70 <help> 58 <help>
71 59
72 **What it does** 60 **What it does**
73 61
74 Detects Gene Ontology and/or other user defined categories which are over/under represented in RNA-seq data. 62 Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.
75 63
76 Options map closely to the excellent manual_ 64 Options map closely to the excellent manual_
65
66
67 **Input files**
68
69 goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes.
70 The format of this file is tabular, with gene_id in the first column and length in the second column.
71 The "get length and gc content" tool can produce such a file.
72
73 You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,
74 category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive
75 gene ontologies for model organisms, or you can construct your own file.
77 76
78 .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf 77 .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
79 78
80 79
81 </help> 80 </help>