r_goseq_1_22_0: goseq.r comparison

comparison goseq.r @ 5:b79c65c90744 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5

author	mvdbeek
date	Sun, 28 Feb 2016 11:52:10 -0500
parents	76eab486aba9
children	0e9424413ab0

comparison

equal deleted inserted replaced

-:76eab486aba9
+:b79c65c90744
 make_option(c("-c", "--cutoff"), type="double",dest="p_adj_cutoff",
 help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"),
 make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"),
 make_option(c("-lf", "--length_file"), type="character", default="FALSE", help = "Path to tabular file mapping gene id to length"),
 make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"),
-make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file")
+make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file"),
+make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical", help="A boolean to indicate whether genes without a categorie should still be used. For example, a large number of gene may have no GO term annotated.  If thisoption is set to FALSE, those genes will be ignored in the calculation of p-values(default behaviour).  If this option is set to TRUE, then these genes will count towards  the  total  number  of  genes  outside  the  category  being  tested  (default behaviour prior to version 1.15.2)."
+)
 )
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
 args = parse_args(parser)
 # Vars:
 sampling_tab = args$sampling_tab
 nobias_tab = args$nobias_tab
 length_bias_plot = args$length_bias_plot
 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot
 repcnt = args$repcnt
+use_genes_without_cat = args$use_genes_without_cat
 # format DE genes into vector suitable for use with goseq
 dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE)
 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff)
 names(genes) = dge_table[,1] # Assuming first row contains gene names
 # Estimate PWF
 pdf(length_bias_plot)
 pwf=nullp(genes, genome, gene_id, gene_lengths)
 message = dev.off()
+# Fetch GO annotations:
+go_map=getgo(names(genes), genome, gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG"))
 # wallenius approximation of p-values
-GO.wall=goseq(pwf, genome, gene_id)
+GO.wall=goseq(pwf, genome, gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
-GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric")
+GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
 # Sampling distribution
 if (repcnt > 0) {
-GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt)
+GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
 # Compare sampling with wallenius
 pdf(sample_vs_wallenius_plot)
 plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]),
 xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)",
 xlim=c(-3,0))

Mercurial > repos > mvdbeek > r_goseq_1_22_0

comparison goseq.r @ 5:b79c65c90744 draft