annotate getgo.r @ 10:f7f3f7db2d4a draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8-dirty
author mvdbeek
date Thu, 31 Mar 2016 12:30:01 -0400
parents 04b9c519d3e1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
2
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
3 # we need that to not crash galaxy with an UTF8 error on German LC settings.
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
5
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
6 suppressPackageStartupMessages({
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
7 library("goseq")
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
8 library("optparse")
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
9 library("reshape2")
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
10 })
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
11
5
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
12 sink(stdout(), type = "message")
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
13
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
14 option_list <- list(
9
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
15 make_option(c("-p", "--package"), type="character", help = "Genome [used for looking up GO categories]"),
5
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
16 make_option(c("-i", "--gene_id"), type="character", help="Gene ID format"),
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
17 make_option(c("-c", "--cats"), type="character", help="Comma-seperated list of categories to fetch"),
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
18 make_option(c("-o", "--output"), type="character", help="Path to output file")
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
19 )
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
20
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
21 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
22 args = parse_args(parser)
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
23
6
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
24 # vars
5
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
25
9
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
26 package = args$package
5
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
27 gene_id = args$gene_id
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
28 output = args$output
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
29 cats = unlist(strsplit(args$cats, ','))
6
0e9424413ab0 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents: 5
diff changeset
30
9
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
31 get_categories = function(package_str, gen, cat) {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
32 # gen should be ENSEMBL, UNIGENE, REFSEQ, SYMBOL or GENENAME
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
33 # package should be org.Xx.eg.db
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
34 # cat should be PMID, GO2ALLEGS, ENZYME or PATH
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
35 library(package_str, character.only = TRUE)
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
36 package = eval( parse( text=package_str ) )
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
37 if( cat %in% c("GO2ALLEGS", "GO2ALLTAIRS", "GO2ALLORFS") ) {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
38 cat = "GOALL"
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
39 }
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
40 if(package_str == "org.Pf.plasmo.db") {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
41 keytype = "ORF"
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
42 } else if(package_str == "org.At.tair.db") {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
43 keytype = "TAIR"
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
44 } else {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
45 keytype = "ENTREZID"
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
46 }
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
47 entrez_cat = select(package, keys(package), cat, keytype)
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
48 entrez_cat = entrez_cat[complete.cases(entrez_cat),]
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
49 if( cat != "GOALL" ) {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
50 # add the origin of the term, so that there are no duplicate values e.g between ENZYME and PATH
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
51 entrez_cat[,2] = sapply(entrez_cat[,2], function(x) paste(cat, x, sep=":"))
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
52 } else {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
53 entrez_cat = entrez_cat[,c(1,2)] # we are discarding ontology (MF, CC, BP) and evidence class here
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
54 }
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
55 colnames(entrez_cat) = c(gen, "category")
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
56 if( gen == "ENTREZ" ) {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
57 return( entrez_cat )
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
58 } else {
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
59 # We map ENTREZ to `gen`, but are potentially loosing gene identifiers where multiple identifiers match a single ENTREZ gene id.
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
60 entrez_cat[,1] = mapIds(package, keys=as.character(entrez_cat[,1]), keytype=keytype, column=gen, multiVals="first")
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
61 entrez_cat = entrez_cat[complete.cases(entrez_cat),]
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
62 return(entrez_cat)
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
63 }
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
64 }
5
b79c65c90744 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff changeset
65
9
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
66 result = lapply( cats, function(x) get_categories(package, gene_id, x ) )
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
67 result = do.call(rbind, result)
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
68
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
69 write.table(result, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
04b9c519d3e1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents: 6
diff changeset
70 sessionInfo()