Mercurial > repos > mvdbeek > r_goseq_1_22_0
comparison goseq.r @ 5:b79c65c90744 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
author | mvdbeek |
---|---|
date | Sun, 28 Feb 2016 11:52:10 -0500 |
parents | 76eab486aba9 |
children | 0e9424413ab0 |
comparison
equal
deleted
inserted
replaced
4:76eab486aba9 | 5:b79c65c90744 |
---|---|
13 make_option(c("-c", "--cutoff"), type="double",dest="p_adj_cutoff", | 13 make_option(c("-c", "--cutoff"), type="double",dest="p_adj_cutoff", |
14 help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"), | 14 help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"), |
15 make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"), | 15 make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"), |
16 make_option(c("-lf", "--length_file"), type="character", default="FALSE", help = "Path to tabular file mapping gene id to length"), | 16 make_option(c("-lf", "--length_file"), type="character", default="FALSE", help = "Path to tabular file mapping gene id to length"), |
17 make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"), | 17 make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"), |
18 make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file") | 18 make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file"), |
19 make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical", help="A boolean to indicate whether genes without a categorie should still be used. For example, a large number of gene may have no GO term annotated. If thisoption is set to FALSE, those genes will be ignored in the calculation of p-values(default behaviour). If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested (default behaviour prior to version 1.15.2)." | |
20 ) | |
19 ) | 21 ) |
20 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 22 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
21 args = parse_args(parser) | 23 args = parse_args(parser) |
22 | 24 |
23 # Vars: | 25 # Vars: |
31 sampling_tab = args$sampling_tab | 33 sampling_tab = args$sampling_tab |
32 nobias_tab = args$nobias_tab | 34 nobias_tab = args$nobias_tab |
33 length_bias_plot = args$length_bias_plot | 35 length_bias_plot = args$length_bias_plot |
34 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot | 36 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot |
35 repcnt = args$repcnt | 37 repcnt = args$repcnt |
38 use_genes_without_cat = args$use_genes_without_cat | |
36 | 39 |
37 # format DE genes into vector suitable for use with goseq | 40 # format DE genes into vector suitable for use with goseq |
38 dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE) | 41 dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE) |
39 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff) | 42 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff) |
40 names(genes) = dge_table[,1] # Assuming first row contains gene names | 43 names(genes) = dge_table[,1] # Assuming first row contains gene names |
51 # Estimate PWF | 54 # Estimate PWF |
52 | 55 |
53 pdf(length_bias_plot) | 56 pdf(length_bias_plot) |
54 pwf=nullp(genes, genome, gene_id, gene_lengths) | 57 pwf=nullp(genes, genome, gene_id, gene_lengths) |
55 message = dev.off() | 58 message = dev.off() |
59 | |
60 # Fetch GO annotations: | |
61 go_map=getgo(names(genes), genome, gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG")) | |
62 | |
56 # wallenius approximation of p-values | 63 # wallenius approximation of p-values |
57 GO.wall=goseq(pwf, genome, gene_id) | 64 GO.wall=goseq(pwf, genome, gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) |
58 | 65 |
59 GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric") | 66 GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) |
60 | 67 |
61 # Sampling distribution | 68 # Sampling distribution |
62 if (repcnt > 0) { | 69 if (repcnt > 0) { |
63 GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt) | 70 GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) |
64 # Compare sampling with wallenius | 71 # Compare sampling with wallenius |
65 pdf(sample_vs_wallenius_plot) | 72 pdf(sample_vs_wallenius_plot) |
66 plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]), | 73 plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]), |
67 xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)", | 74 xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)", |
68 xlim=c(-3,0)) | 75 xlim=c(-3,0)) |