Mercurial > repos > mvdbeek > r_goseq_1_22_0
annotate getgo.r @ 10:f7f3f7db2d4a draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8-dirty
author | mvdbeek |
---|---|
date | Thu, 31 Mar 2016 12:30:01 -0400 |
parents | 04b9c519d3e1 |
children |
rev | line source |
---|---|
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
2 |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
5 |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
6 suppressPackageStartupMessages({ |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
7 library("goseq") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
8 library("optparse") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
9 library("reshape2") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
10 }) |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
11 |
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
12 sink(stdout(), type = "message") |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
13 |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
14 option_list <- list( |
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
15 make_option(c("-p", "--package"), type="character", help = "Genome [used for looking up GO categories]"), |
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
16 make_option(c("-i", "--gene_id"), type="character", help="Gene ID format"), |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
17 make_option(c("-c", "--cats"), type="character", help="Comma-seperated list of categories to fetch"), |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
18 make_option(c("-o", "--output"), type="character", help="Path to output file") |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
19 ) |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
20 |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
21 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
22 args = parse_args(parser) |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
23 |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
24 # vars |
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
25 |
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
26 package = args$package |
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
27 gene_id = args$gene_id |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
28 output = args$output |
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
29 cats = unlist(strsplit(args$cats, ',')) |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
30 |
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
31 get_categories = function(package_str, gen, cat) { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
32 # gen should be ENSEMBL, UNIGENE, REFSEQ, SYMBOL or GENENAME |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
33 # package should be org.Xx.eg.db |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
34 # cat should be PMID, GO2ALLEGS, ENZYME or PATH |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
35 library(package_str, character.only = TRUE) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
36 package = eval( parse( text=package_str ) ) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
37 if( cat %in% c("GO2ALLEGS", "GO2ALLTAIRS", "GO2ALLORFS") ) { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
38 cat = "GOALL" |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
39 } |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
40 if(package_str == "org.Pf.plasmo.db") { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
41 keytype = "ORF" |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
42 } else if(package_str == "org.At.tair.db") { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
43 keytype = "TAIR" |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
44 } else { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
45 keytype = "ENTREZID" |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
46 } |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
47 entrez_cat = select(package, keys(package), cat, keytype) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
48 entrez_cat = entrez_cat[complete.cases(entrez_cat),] |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
49 if( cat != "GOALL" ) { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
50 # add the origin of the term, so that there are no duplicate values e.g between ENZYME and PATH |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
51 entrez_cat[,2] = sapply(entrez_cat[,2], function(x) paste(cat, x, sep=":")) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
52 } else { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
53 entrez_cat = entrez_cat[,c(1,2)] # we are discarding ontology (MF, CC, BP) and evidence class here |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
54 } |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
55 colnames(entrez_cat) = c(gen, "category") |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
56 if( gen == "ENTREZ" ) { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
57 return( entrez_cat ) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
58 } else { |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
59 # We map ENTREZ to `gen`, but are potentially loosing gene identifiers where multiple identifiers match a single ENTREZ gene id. |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
60 entrez_cat[,1] = mapIds(package, keys=as.character(entrez_cat[,1]), keytype=keytype, column=gen, multiVals="first") |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
61 entrez_cat = entrez_cat[complete.cases(entrez_cat),] |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
62 return(entrez_cat) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
63 } |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
64 } |
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
65 |
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
66 result = lapply( cats, function(x) get_categories(package, gene_id, x ) ) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
67 result = do.call(rbind, result) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
68 |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
69 write.table(result, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE) |
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
70 sessionInfo() |