# HG changeset patch # User mvdbeek # Date 1456678330 18000 # Node ID b79c65c90744cfabfb2d6f879056b5dd8a5e29f3 # Parent 76eab486aba9b3e1e90c5a41e71f87a78b57c460 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5 diff -r 76eab486aba9 -r b79c65c90744 getgo.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getgo.r Sun Feb 28 11:52:10 2016 -0500 @@ -0,0 +1,35 @@ +suppressWarnings(suppressMessages(library(goseq))) +suppressWarnings(suppressMessages(library(optparse))) +suppressWarnings(suppressMessages(library(rtracklayer))) +suppressWarnings(suppressMessages(library(reshape2))) +sink(stdout(), type = "message") + +option_list <- list( + make_option(c("-gtf", "--gtf"), type="character", help = "Path to GTF file for which to fetch GO data"), + make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up GO categories]"), + make_option(c("-i", "--gene_id"), type="character", help="Gene ID format"), + make_option(c("-c", "--cats"), type="character", help="Comma-seperated list of categories to fetch"), + make_option(c("-o", "--output"), type="character", help="Path to output file") +) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +args = parse_args(parser) + +# Vars: + +gtf = args$gtf +genome = args$genome +gene_id = args$gene_id +output = args$output +cats = unlist(strsplit(args$cats, ',')) +genes = unique(import.gff(gtf)$gene_id) +go_categories = getgo(genes, genome, id, fetch.cats=cats) + +# transform go category list to sth. more manipulatable in galaxy +go_categories <- lapply(go_categories, unlist) +go_categories = goseq:::reversemapping(go_categories) +go_categories = melt(go_categories) +colnames(go_categories) = c("#gene_id", "go_category") + +write.table(go_categories, output, sep="\t", row.names = FALSE, quote = FALSE) +sessionInfo() \ No newline at end of file diff -r 76eab486aba9 -r b79c65c90744 getgo.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getgo.xml Sun Feb 28 11:52:10 2016 -0500 @@ -0,0 +1,43 @@ + + + + R + goseq + + + getgo.r --genome "$genome" + --gtf "$gtf" + --gene_id "$gene_id" + --output "$output" + --cats "$cats" + + + + + + + + + + + + + + + + + + + + + + + **What it does** + + Returns a tabular file with GO gene categories. + + + + + + diff -r 76eab486aba9 -r b79c65c90744 goseq.r --- a/goseq.r Fri Feb 26 13:31:00 2016 -0500 +++ b/goseq.r Sun Feb 28 11:52:10 2016 -0500 @@ -15,7 +15,9 @@ make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"), make_option(c("-lf", "--length_file"), type="character", default="FALSE", help = "Path to tabular file mapping gene id to length"), make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"), - make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file") + make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file"), + make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical", help="A boolean to indicate whether genes without a categorie should still be used. For example, a large number of gene may have no GO term annotated. If thisoption is set to FALSE, those genes will be ignored in the calculation of p-values(default behaviour). If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested (default behaviour prior to version 1.15.2)." +) ) parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) args = parse_args(parser) @@ -33,6 +35,7 @@ length_bias_plot = args$length_bias_plot sample_vs_wallenius_plot = args$sample_vs_wallenius_plot repcnt = args$repcnt +use_genes_without_cat = args$use_genes_without_cat # format DE genes into vector suitable for use with goseq dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE) @@ -53,14 +56,18 @@ pdf(length_bias_plot) pwf=nullp(genes, genome, gene_id, gene_lengths) message = dev.off() + +# Fetch GO annotations: +go_map=getgo(names(genes), genome, gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG")) + # wallenius approximation of p-values -GO.wall=goseq(pwf, genome, gene_id) +GO.wall=goseq(pwf, genome, gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) -GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric") +GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) # Sampling distribution if (repcnt > 0) { - GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt) + GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) # Compare sampling with wallenius pdf(sample_vs_wallenius_plot) plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]), diff -r 76eab486aba9 -r b79c65c90744 goseq.xml --- a/goseq.xml Fri Feb 26 13:31:00 2016 -0500 +++ b/goseq.xml Sun Feb 28 11:52:10 2016 -0500 @@ -19,10 +19,13 @@ --length_bias_plot "$length_bias_plot" --sample_vs_wallenius_plot "$sample_vs_wallenius_plot" --repcnt "$repcnt" + --use_genes_without_cat "$use_genes_without_cat" + diff -r 76eab486aba9 -r b79c65c90744 tool_dependencies.xml --- a/tool_dependencies.xml Fri Feb 26 13:31:00 2016 -0500 +++ b/tool_dependencies.xml Sun Feb 28 11:52:10 2016 -0500 @@ -4,6 +4,6 @@ - +