Mercurial > repos > mvdbeek > r_goseq_1_22_0
changeset 9:04b9c519d3e1 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
author | mvdbeek |
---|---|
date | Thu, 31 Mar 2016 12:23:45 -0400 |
parents | fb95db039592 |
children | f7f3f7db2d4a |
files | getgo.r getgo.xml go_macros.xml goseq.xml test-data/available_categories.loc test-data/available_identifiers.loc test-data/gene_ids.loc test-data/genomes.loc test-data/org_name.loc test-data/tool_data_table_conf.xml test-data/wal.tab tool-data/NCBI.sqlite tool-data/available_categories.loc.sample tool-data/available_identifiers.loc.sample tool-data/gene2pubmed.gz tool-data/gene_ids.loc.sample tool-data/genomes.loc.sample tool-data/listing_available_identifiers_and_data_sources.r tool-data/org_name.loc.sample tool-data/org_packages.tab tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample.test tool_dependencies.xml |
diffstat | 23 files changed, 647 insertions(+), 492 deletions(-) [+] |
line wrap: on
line diff
--- a/getgo.r Mon Mar 07 14:35:53 2016 -0500 +++ b/getgo.r Thu Mar 31 12:23:45 2016 -0400 @@ -6,15 +6,13 @@ suppressPackageStartupMessages({ library("goseq") library("optparse") - library("rtracklayer") library("reshape2") }) sink(stdout(), type = "message") option_list <- list( - make_option(c("-gtf", "--gtf"), type="character", help = "Path to GTF file for which to fetch GO data"), - make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up GO categories]"), + make_option(c("-p", "--package"), type="character", help = "Genome [used for looking up GO categories]"), make_option(c("-i", "--gene_id"), type="character", help="Gene ID format"), make_option(c("-c", "--cats"), type="character", help="Comma-seperated list of categories to fetch"), make_option(c("-o", "--output"), type="character", help="Path to output file") @@ -25,17 +23,48 @@ # vars -gtf = args$gtf -genome = args$genome +package = args$package gene_id = args$gene_id output = args$output cats = unlist(strsplit(args$cats, ',')) -# retrieve and transform data -genes = unique(import.gff(gtf)$gene_id) -go_categories = getgo(genes, genome, gene_id, fetch.cats=cats) -go_categories = goseq:::reversemapping(go_categories) -go_categories = melt(go_categories) +get_categories = function(package_str, gen, cat) { + # gen should be ENSEMBL, UNIGENE, REFSEQ, SYMBOL or GENENAME + # package should be org.Xx.eg.db + # cat should be PMID, GO2ALLEGS, ENZYME or PATH + library(package_str, character.only = TRUE) + package = eval( parse( text=package_str ) ) + if( cat %in% c("GO2ALLEGS", "GO2ALLTAIRS", "GO2ALLORFS") ) { + cat = "GOALL" + } + if(package_str == "org.Pf.plasmo.db") { + keytype = "ORF" + } else if(package_str == "org.At.tair.db") { + keytype = "TAIR" + } else { + keytype = "ENTREZID" + } + entrez_cat = select(package, keys(package), cat, keytype) + entrez_cat = entrez_cat[complete.cases(entrez_cat),] + if( cat != "GOALL" ) { + # add the origin of the term, so that there are no duplicate values e.g between ENZYME and PATH + entrez_cat[,2] = sapply(entrez_cat[,2], function(x) paste(cat, x, sep=":")) + } else { + entrez_cat = entrez_cat[,c(1,2)] # we are discarding ontology (MF, CC, BP) and evidence class here + } + colnames(entrez_cat) = c(gen, "category") + if( gen == "ENTREZ" ) { + return( entrez_cat ) + } else { + # We map ENTREZ to `gen`, but are potentially loosing gene identifiers where multiple identifiers match a single ENTREZ gene id. + entrez_cat[,1] = mapIds(package, keys=as.character(entrez_cat[,1]), keytype=keytype, column=gen, multiVals="first") + entrez_cat = entrez_cat[complete.cases(entrez_cat),] + return(entrez_cat) + } +} -write.table(go_categories, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE) -sessionInfo() \ No newline at end of file +result = lapply( cats, function(x) get_categories(package, gene_id, x ) ) +result = do.call(rbind, result) + +write.table(result, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE) +sessionInfo()
--- a/getgo.xml Mon Mar 07 14:35:53 2016 -0500 +++ b/getgo.xml Thu Mar 31 12:23:45 2016 -0400 @@ -1,53 +1,58 @@ -<tool id="getgo" name="getgo" version="0.1.0"> - <description>downloads gene ontologies for model organisms</description> +<tool id="getgo" name="get_gene_categories" version="0.1.0"> + <description>retrieve gene categories for model organisms</description> <macros> <import>go_macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <command interpreter="Rscript"> - getgo.r --genome "$genome" - --gtf "$gtf" + <command><![CDATA[ + Rscript $__tool_directory__/getgo.r + --package "$package" --gene_id "$gene_id" --output "$output" --cats "$cats" + ]]> </command> <inputs> - <param name="gtf" label="select GTF file" help="GO annotations for all gene ids in this GTF will be fetched" type="data" format="gtf"/> - <param help="Needed to retrieve GO annotations for the selected genome" label="Select the genome source" name="genome" size="3" type="select"> - <options from_data_table="go_genomes"></options> + <param help="These are bioconductor genome annotation packages." label="Select the genome package" name="package" size="3" type="select"> + <options from_data_table="org_names"></options> </param> - <param help="Needed for GO analysis" label="Select gene identifier format" name="gene_id" type="select"> - <options from_data_table="go_gene_ids"></options> + <param help="This option determines which gene identifier format is used for mapping genes to categories in the output file. If you have a list of differentially expressed genes, choose the same format." label="Select gene identifier format" name="gene_id" type="select"> + <options from_data_table="available_identifiers"> + <filter type="param_value" ref="package" column="2"/> + </options> </param> - <param name="cats" help="Select the categories for which you would like to retrieve ontologies" type="select" multiple="true" display="checkboxes"> - <option value="GO:CC">GO:Cellular Components</option> - <option value="GO:BP">BiologicalProcesses</option> - <option value="GO:MF">Molecular Function</option> - <option value="KEGG">KEGG pathway</option> + <param name="cats" label="Select categories" help="Select a category will return the category and a list of all genes in that category" type="select" multiple="true" display="checkboxes"> + <options from_data_table="available_categories"> + <filter type="param_value" ref="package" column="2"/> + </options> </param> </inputs> <outputs> - <data format="tabular" label="GO category mapping" name="output" /> + <data format="tabular" label="gene category mapping" name="output" /> </outputs> <tests> <test> - <param name="gtf" value="in.gtf" ftype="gtf"></param> - <param name="genome" value="hg38"></param> - <param name="gene_id" value="ensGene"></param> - <param name="cats" value="GO:CC,GO:BP,GO:MF"></param> - <output name="output" file="go_terms.tab"></output> + <param name="package" value="org.Hs.eg.db"></param> + <param name="gene_id" value="ENSEMBL"></param> + <param name="cats" value="GO2ALLEGS"></param> + <output name="output" file="go_terms.tab" compare="contains"></output> </test> </tests> <help> **What it does** - Returns a tabular file with GO gene categories for all genes present in the input GTF file. - + This tool uses bioconductor species annotation packages [org.Xx.xx.db] to extract gene category information. + To do gene {category/set} enrichment analysis, use the output of this tool with the goseq tool. </help> <citations> - <citation type="doi">10.1186/gb-2010-11-2-r14</citation> + <citation type="bibtex">@ARTICLE{AnnotationDbi, + title = {AnnotationDbi: Annotation Database Interface}, + author = {Herve Pages and Marc Carlson and Seth Falcon and Nianhua Li}, + note = {R package version 1.32.3} + } + </citation> </citations> </tool>
--- a/go_macros.xml Mon Mar 07 14:35:53 2016 -0500 +++ b/go_macros.xml Thu Mar 31 12:23:45 2016 -0400 @@ -3,6 +3,7 @@ <requirements> <requirement type="package" version="3.2.1">R</requirement> <requirement type="package" version="1.22.0">goseq</requirement> + <requirement type="package" version="1.22.0">bioconductor-goseq</requirement> </requirements> </xml> <xml name="stdio"> @@ -21,4 +22,4 @@ description="An undefined error occured, please check your input carefully and contact your administrator." /> </stdio> </xml> -</macros> \ No newline at end of file +</macros>
--- a/goseq.xml Mon Mar 07 14:35:53 2016 -0500 +++ b/goseq.xml Thu Mar 31 12:23:45 2016 -0400 @@ -70,8 +70,7 @@ <param name="length_file" value="gene_length.tab" ftype="tabular"/> <param name="category_file" value="category.tab" ftype="tabular"/> <param name="use_genes_without_cat" value="true" /> - <param name="p_adj_column" value="2" /> - <output name="wallenius_tab" file="wal.tab" compare="contains"/>/> + <output name="wallenius_tab" file="wal.tab" compare="re_match"/>/> </test> </tests> <help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/available_categories.loc Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,90 @@ +ENZYME ENZYME org.Ag.eg.db +GO2ALLEGS GO2ALLEGS org.Ag.eg.db +PATH PATH org.Ag.eg.db +PMID PMID org.Ag.eg.db +ENZYME ENZYME org.At.tair.db +GO2ALLTAIRS GO2ALLTAIRS org.At.tair.db +PATH PATH org.At.tair.db +PMID PMID org.At.tair.db +ENZYME ENZYME org.Bt.eg.db +GO2ALLEGS GO2ALLEGS org.Bt.eg.db +PATH PATH org.Bt.eg.db +PFAM PFAM org.Bt.eg.db +PMID PMID org.Bt.eg.db +PROSITE PROSITE org.Bt.eg.db +ENZYME ENZYME org.Ce.eg.db +GO2ALLEGS GO2ALLEGS org.Ce.eg.db +PATH PATH org.Ce.eg.db +PMID PMID org.Ce.eg.db +ENZYME ENZYME org.Cf.eg.db +GO2ALLEGS GO2ALLEGS org.Cf.eg.db +PATH PATH org.Cf.eg.db +PMID PMID org.Cf.eg.db +ENZYME ENZYME org.Dm.eg.db +GO2ALLEGS GO2ALLEGS org.Dm.eg.db +PATH PATH org.Dm.eg.db +PMID PMID org.Dm.eg.db +ENZYME ENZYME org.Dr.eg.db +GO2ALLEGS GO2ALLEGS org.Dr.eg.db +PATH PATH org.Dr.eg.db +PFAM PFAM org.Dr.eg.db +PMID PMID org.Dr.eg.db +PROSITE PROSITE org.Dr.eg.db +ENZYME ENZYME org.EcK12.eg.db +GO2ALLEGS GO2ALLEGS org.EcK12.eg.db +PATH PATH org.EcK12.eg.db +PMID PMID org.EcK12.eg.db +ENZYME ENZYME org.EcSakai.eg.db +GO2ALLEGS GO2ALLEGS org.EcSakai.eg.db +PATH PATH org.EcSakai.eg.db +PMID PMID org.EcSakai.eg.db +ENZYME ENZYME org.Gg.eg.db +GO2ALLEGS GO2ALLEGS org.Gg.eg.db +PATH PATH org.Gg.eg.db +PFAM PFAM org.Gg.eg.db +PMID PMID org.Gg.eg.db +PROSITE PROSITE org.Gg.eg.db +ENZYME ENZYME org.Hs.eg.db +GO2ALLEGS GO2ALLEGS org.Hs.eg.db +PATH PATH org.Hs.eg.db +PFAM PFAM org.Hs.eg.db +PMID PMID org.Hs.eg.db +PROSITE PROSITE org.Hs.eg.db +ENZYME ENZYME org.Mm.eg.db +GO2ALLEGS GO2ALLEGS org.Mm.eg.db +PATH PATH org.Mm.eg.db +PFAM PFAM org.Mm.eg.db +PMID PMID org.Mm.eg.db +PROSITE PROSITE org.Mm.eg.db +ENZYME ENZYME org.Mmu.eg.db +GO2ALLEGS GO2ALLEGS org.Mmu.eg.db +PATH PATH org.Mmu.eg.db +PMID PMID org.Mmu.eg.db +ENZYME ENZYME org.Pf.plasmo.db +GO2ALLORFS GO2ALLORFS org.Pf.plasmo.db +PATH PATH org.Pf.plasmo.db +ENZYME ENZYME org.Pt.eg.db +GO2ALLEGS GO2ALLEGS org.Pt.eg.db +PATH PATH org.Pt.eg.db +PMID PMID org.Pt.eg.db +ENZYME ENZYME org.Rn.eg.db +GO2ALLEGS GO2ALLEGS org.Rn.eg.db +PATH PATH org.Rn.eg.db +PFAM PFAM org.Rn.eg.db +PMID PMID org.Rn.eg.db +PROSITE PROSITE org.Rn.eg.db +ENZYME ENZYME org.Sc.sgd.db +GO2ALLORFS GO2ALLORFS org.Sc.sgd.db +PATH PATH org.Sc.sgd.db +PFAM PFAM org.Sc.sgd.db +PMID PMID org.Sc.sgd.db +ENZYME ENZYME org.Ss.eg.db +GO2ALLEGS GO2ALLEGS org.Ss.eg.db +PATH PATH org.Ss.eg.db +PMID PMID org.Ss.eg.db +GO2ALLEGS GO2ALLEGS org.Tgondii.eg.db +PMID PMID org.Tgondii.eg.db +ENZYME ENZYME org.Xl.eg.db +GO2ALLEGS GO2ALLEGS org.Xl.eg.db +PATH PATH org.Xl.eg.db +PMID PMID org.Xl.eg.db
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/available_identifiers.loc Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,137 @@ +ENTREZ ENTREZ org.Ag.eg.db +ENTREZ ENTREZ org.At.tair.db +ENTREZ ENTREZ org.Bt.eg.db +ENTREZ ENTREZ org.Ce.eg.db +ENTREZ ENTREZ org.Cf.eg.db +ENTREZ ENTREZ org.Dm.eg.db +ENTREZ ENTREZ org.Dr.eg.db +ENTREZ ENTREZ org.EcK12.eg.db +ENTREZ ENTREZ org.EcSakai.eg.db +ENTREZ ENTREZ org.Gg.eg.db +ENTREZ ENTREZ org.Hs.eg.db +ENTREZ ENTREZ org.Mm.eg.db +ENTREZ ENTREZ org.Mmu.eg.db +ENTREZ ENTREZ org.Pf.plasmo.db +ENTREZ ENTREZ org.Pt.eg.db +ENTREZ ENTREZ org.Rn.eg.db +ENTREZ ENTREZ org.Sc.sgd.db +ENTREZ ENTREZ org.Ss.eg.db +ENTREZ ENTREZ org.Tgondii.eg.db +ENTREZ ENTREZ org.Xl.eg.db +ACCNUM ACCNUM org.Ag.eg.db +ENSEMBL ENSEMBL org.Ag.eg.db +GENENAME GENENAME org.Ag.eg.db +REFSEQ REFSEQ org.Ag.eg.db +SYMBOL SYMBOL org.Ag.eg.db +UNIGENE UNIGENE org.Ag.eg.db +UNIPROT UNIPROT org.Ag.eg.db +GENENAME GENENAME org.At.tair.db +REFSEQ REFSEQ org.At.tair.db +SYMBOL SYMBOL org.At.tair.db +ACCNUM ACCNUM org.Bt.eg.db +ENSEMBL ENSEMBL org.Bt.eg.db +GENENAME GENENAME org.Bt.eg.db +REFSEQ REFSEQ org.Bt.eg.db +SYMBOL SYMBOL org.Bt.eg.db +UNIGENE UNIGENE org.Bt.eg.db +UNIPROT UNIPROT org.Bt.eg.db +ACCNUM ACCNUM org.Ce.eg.db +ENSEMBL ENSEMBL org.Ce.eg.db +GENENAME GENENAME org.Ce.eg.db +REFSEQ REFSEQ org.Ce.eg.db +SYMBOL SYMBOL org.Ce.eg.db +UNIGENE UNIGENE org.Ce.eg.db +UNIPROT UNIPROT org.Ce.eg.db +ACCNUM ACCNUM org.Cf.eg.db +ENSEMBL ENSEMBL org.Cf.eg.db +GENENAME GENENAME org.Cf.eg.db +REFSEQ REFSEQ org.Cf.eg.db +SYMBOL SYMBOL org.Cf.eg.db +UNIGENE UNIGENE org.Cf.eg.db +UNIPROT UNIPROT org.Cf.eg.db +ACCNUM ACCNUM org.Dm.eg.db +ENSEMBL ENSEMBL org.Dm.eg.db +FLYBASECG FLYBASECG org.Dm.eg.db +GENENAME GENENAME org.Dm.eg.db +REFSEQ REFSEQ org.Dm.eg.db +SYMBOL SYMBOL org.Dm.eg.db +UNIGENE UNIGENE org.Dm.eg.db +UNIPROT UNIPROT org.Dm.eg.db +ACCNUM ACCNUM org.Dr.eg.db +ENSEMBL ENSEMBL org.Dr.eg.db +GENENAME GENENAME org.Dr.eg.db +REFSEQ REFSEQ org.Dr.eg.db +SYMBOL SYMBOL org.Dr.eg.db +UNIGENE UNIGENE org.Dr.eg.db +UNIPROT UNIPROT org.Dr.eg.db +ACCNUM ACCNUM org.EcK12.eg.db +GENENAME GENENAME org.EcK12.eg.db +REFSEQ REFSEQ org.EcK12.eg.db +SYMBOL SYMBOL org.EcK12.eg.db +ACCNUM ACCNUM org.EcSakai.eg.db +GENENAME GENENAME org.EcSakai.eg.db +REFSEQ REFSEQ org.EcSakai.eg.db +SYMBOL SYMBOL org.EcSakai.eg.db +ACCNUM ACCNUM org.Gg.eg.db +ENSEMBL ENSEMBL org.Gg.eg.db +GENENAME GENENAME org.Gg.eg.db +REFSEQ REFSEQ org.Gg.eg.db +SYMBOL SYMBOL org.Gg.eg.db +UNIGENE UNIGENE org.Gg.eg.db +UNIPROT UNIPROT org.Gg.eg.db +ACCNUM ACCNUM org.Hs.eg.db +ENSEMBL ENSEMBL org.Hs.eg.db +GENENAME GENENAME org.Hs.eg.db +REFSEQ REFSEQ org.Hs.eg.db +SYMBOL SYMBOL org.Hs.eg.db +UNIGENE UNIGENE org.Hs.eg.db +UNIPROT UNIPROT org.Hs.eg.db +ACCNUM ACCNUM org.Mm.eg.db +ENSEMBL ENSEMBL org.Mm.eg.db +GENENAME GENENAME org.Mm.eg.db +REFSEQ REFSEQ org.Mm.eg.db +SYMBOL SYMBOL org.Mm.eg.db +UNIGENE UNIGENE org.Mm.eg.db +UNIPROT UNIPROT org.Mm.eg.db +ACCNUM ACCNUM org.Mmu.eg.db +ENSEMBL ENSEMBL org.Mmu.eg.db +GENENAME GENENAME org.Mmu.eg.db +REFSEQ REFSEQ org.Mmu.eg.db +SYMBOL SYMBOL org.Mmu.eg.db +UNIPROT UNIPROT org.Mmu.eg.db +GENENAME GENENAME org.Pf.plasmo.db +SYMBOL SYMBOL org.Pf.plasmo.db +ACCNUM ACCNUM org.Pt.eg.db +ENSEMBL ENSEMBL org.Pt.eg.db +GENENAME GENENAME org.Pt.eg.db +REFSEQ REFSEQ org.Pt.eg.db +SYMBOL SYMBOL org.Pt.eg.db +UNIPROT UNIPROT org.Pt.eg.db +ACCNUM ACCNUM org.Rn.eg.db +ENSEMBL ENSEMBL org.Rn.eg.db +GENENAME GENENAME org.Rn.eg.db +REFSEQ REFSEQ org.Rn.eg.db +SYMBOL SYMBOL org.Rn.eg.db +UNIGENE UNIGENE org.Rn.eg.db +UNIPROT UNIPROT org.Rn.eg.db +ENSEMBL ENSEMBL org.Sc.sgd.db +GENENAME GENENAME org.Sc.sgd.db +REFSEQ REFSEQ org.Sc.sgd.db +UNIPROT UNIPROT org.Sc.sgd.db +ACCNUM ACCNUM org.Ss.eg.db +GENENAME GENENAME org.Ss.eg.db +REFSEQ REFSEQ org.Ss.eg.db +SYMBOL SYMBOL org.Ss.eg.db +UNIGENE UNIGENE org.Ss.eg.db +UNIPROT UNIPROT org.Ss.eg.db +ACCNUM ACCNUM org.Tgondii.eg.db +GENENAME GENENAME org.Tgondii.eg.db +REFSEQ REFSEQ org.Tgondii.eg.db +SYMBOL SYMBOL org.Tgondii.eg.db +UNIGENE UNIGENE org.Tgondii.eg.db +ACCNUM ACCNUM org.Xl.eg.db +GENENAME GENENAME org.Xl.eg.db +REFSEQ REFSEQ org.Xl.eg.db +SYMBOL SYMBOL org.Xl.eg.db +UNIGENE UNIGENE org.Xl.eg.db +UNIPROT UNIPROT org.Xl.eg.db
--- a/test-data/gene_ids.loc Mon Mar 07 14:35:53 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -knownGene knownGene, UCSC Genes, NA, Entrez Gene ID -knownGeneOld3 knownGeneOld3, Old UCSC Genes, NA, -ccdsGene ccdsGene, CCDS, NA, -refGene refGene, RefSeq Genes, NA, Entrez Gene ID -xenoRefGene xenoRefGene, Other RefSeq, NA, -vegaGene vegaGene, Vega Genes, Vega Protein Genes, HAVANA Pseudogene ID -vegaPseudoGene vegaPseudoGene, Vega Genes, Vega Pseudogenes, HAVANA Pseudogene ID -ensGene ensGene, Ensembl Genes, NA, Ensembl gene ID -acembly acembly, AceView Genes, NA, -sibGene sibGene, SIB Genes, NA, -nscanPasaGene nscanPasaGene, N-SCAN, N-SCAN PASA-EST, -nscanGene nscanGene, N-SCAN, N-SCAN, -sgpGene sgpGene, SGP Genes, NA, -geneid geneid, Geneid Genes, NA, -genscan genscan, Genscan Genes, NA, -exoniphy exoniphy, Exoniphy, NA, -augustusHints augustusHints, Augustus, Augustus Hints, -augustusXRA augustusXRA, Augustus, Augustus De Novo, -augustusAbinitio augustusAbinitio, Augustus, Augustus Ab Initio, -acescan acescan, ACEScan, NA, -lincRNAsTranscripts lincRNAsTranscripts, lincRNAsTranscripts, NA, Name of gene -wgEncodeGencodeManualV3 wgEncodeGencodeManualV3, Gencode Genes, Gencode Manual, Ensembl gene ID -wgEncodeGencodeAutoV3 wgEncodeGencodeAutoV3, Gencode Genes, Gencode Auto, Ensembl gene ID -wgEncodeGencodePolyaV3 wgEncodeGencodePolyaV3, Gencode Genes, Gencode PolyA, Ensembl gene ID -wgEncodeGencodeBasicV17 wgEncodeGencodeBasicV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodeCompV17 wgEncodeGencodeCompV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodePseudoGeneV17 wgEncodeGencodePseudoGeneV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencode2wayConsPseudoV17 wgEncodeGencode2wayConsPseudoV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodePolyaV17 wgEncodeGencodePolyaV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodeBasicV14 wgEncodeGencodeBasicV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodeCompV14 wgEncodeGencodeCompV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodePseudoGeneV14 wgEncodeGencodePseudoGeneV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencode2wayConsPseudoV14 wgEncodeGencode2wayConsPseudoV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodePolyaV14 wgEncodeGencodePolyaV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodeBasicV7 wgEncodeGencodeBasicV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencodeCompV7 wgEncodeGencodeCompV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencodePseudoGeneV7 wgEncodeGencodePseudoGeneV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencode2wayConsPseudoV7 wgEncodeGencode2wayConsPseudoV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencodePolyaV7 wgEncodeGencodePolyaV7, GENCODE Genes V7, NA, Ensembl gene ID -flyBaseGene flyBaseGene, FlyBase Genes, NA, Name of canonical transcript in cluster -sgdGene sgdGene, SGD Genes, NA, Name of canonical transcript in cluster -geneSymbol geneSymbol, refGene, refFlat, Gene Symbol
--- a/test-data/genomes.loc Mon Mar 07 14:35:53 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,170 +0,0 @@ -hg38 hg38, Human, Dec. 2013, Genome Reference Consortium GRCh38 -hg19 hg19, Human, Feb. 2009, Genome Reference Consortium GRCh37 -hg18 hg18, Human, Mar. 2006, NCBI Build 36.1 -hg17 hg17, Human, May 2004, NCBI Build 35 -hg16 hg16, Human, Jul. 2003, NCBI Build 34 -vicPac2 vicPac2, Alpaca, Mar. 2013, Broad Institute Vicugna_pacos-2.0.1 -vicPac1 vicPac1, Alpaca, Jul. 2008, Broad Institute VicPac1.0 -dasNov3 dasNov3, Armadillo, Dec. 2011, Broad Institute DasNov3 -otoGar3 otoGar3, Bushbaby, Mar. 2011, Broad Institute OtoGar3 -papHam1 papHam1, Baboon, Nov. 2008, Baylor College of Medicine HGSC Pham_1.0 -papAnu2 papAnu2, Baboon, Mar. 2012, Baylor College of Medicine Panu_2.0 -felCat8 felCat8, Cat, Nov. 2014, ICGSC Felis_catus_8.0 -felCat5 felCat5, Cat, Sep. 2011, ICGSC Felis_catus-6.2 -felCat4 felCat4, Cat, Dec. 2008, NHGRI catChrV17e -felCat3 felCat3, Cat, Mar. 2006, Broad Institute Release 3 -panTro4 panTro4, Chimp, Feb. 2011, CGSC Build 2.1.4 -panTro3 panTro3, Chimp, Oct. 2010, CGSC Build 2.1.3 -panTro2 panTro2, Chimp, Mar. 2006, CGSC Build 2.1 -panTro1 panTro1, Chimp, Nov. 2003, CGSC Build 1.1 -criGri1 criGri1, Chinese hamster, Jul. 2013, Beijing Genomics Institution-Shenzhen C_griseus_v1.0 -bosTau8 bosTau8, Cow, Jun. 2014, University of Maryland v3.1.1 -bosTau7 bosTau7, Cow, Oct. 2011, Baylor College of Medicine HGSC Btau_4.6.1 -bosTau6 bosTau6, Cow, Nov. 2009, University of Maryland v3.1 -bosTau4 bosTau4, Cow, Oct. 2007, Baylor College of Medicine HGSC Btau_4.0 -bosTau3 bosTau3, Cow, Aug. 2006, Baylor College of Medicine HGSC Btau_3.1 -bosTau2 bosTau2, Cow, Mar. 2005, Baylor College of Medicine HGSC Btau_2.0 -canFam3 canFam3, Dog, Sep. 2011, Broad Institute v3.1 -canFam2 canFam2, Dog, May 2005, Broad Institute v2.0 -canFam1 canFam1, Dog, Jul. 2004, Broad Institute v1.0 -turTru2 turTru2, Dolphin, Oct. 2011, Baylor College of Medicine Ttru_1.4 -loxAfr3 loxAfr3, Elephant, Jul. 2009, Broad Institute LoxAfr3 -musFur1 musFur1, Ferret, Apr. 2011, Ferret Genome Sequencing Consortium MusPutFur1.0 -nomLeu3 nomLeu3, Gibbon, Oct. 2012, Gibbon Genome Sequencing Consortium Nleu3.0 -nomLeu2 nomLeu2, Gibbon, Jun. 2011, Gibbon Genome Sequencing Consortium Nleu1.1 -nomLeu1 nomLeu1, Gibbon, Jan. 2010, Gibbon Genome Sequencing Consortium Nleu1.0 -gorGor3 gorGor3, Gorilla, May 2011, Wellcome Trust Sanger Institute gorGor3.1 -cavPor3 cavPor3, Guinea pig, Feb. 2008, Broad Institute cavPor3 -eriEur2 eriEur2, Hedgehog, May 2012, Broad Institute EriEur2.0 -eriEur1 eriEur1, Hedgehog, Jun. 2006, Broad Institute Draft_v1 -equCab2 equCab2, Horse, Sep. 2007, Broad Institute EquCab2 -equCab1 equCab1, Horse, Jan. 2007, Broad Institute EquCab1 -dipOrd1 dipOrd1, Kangaroo rat, Jul. 2008, Baylor/Broad Institute DipOrd1.0 -triMan1 triMan1, Manatee, Oct. 2011, Broad Institute TriManLat1.0 -calJac3 calJac3, Marmoset, Mar. 2009, WUSTL Callithrix_jacchus-v3.2 -calJac1 calJac1, Marmoset, Jun. 2007, WUSTL Callithrix_jacchus-v2.0.2 -pteVam1 pteVam1, Megabat, Jul. 2008, Broad Institute Ptevap1.0 -myoLuc2 myoLuc2, Microbat, Jul. 2010, Broad Institute MyoLuc2.0 -balAcu1 balAcu1, Minke whale, Oct. 2013, KORDI BalAcu1.0 -mm10 mm10, Mouse, Dec. 2011, Genome Reference Consortium GRCm38 -mm9 mm9, Mouse, Jul. 2007, NCBI Build 37 -mm8 mm8, Mouse, Feb. 2006, NCBI Build 36 -mm7 mm7, Mouse, Aug. 2005, NCBI Build 35 -micMur2 micMur2, Mouse lemur, May 2015, Baylor/Broad Institute Mmur_2.0 -micMur1 micMur1, Mouse lemur, Jul. 2007, Broad Institute MicMur1.0 -hetGla2 hetGla2, Naked mole-rat, Jan. 2012, Broad Institute HetGla_female_1.0 -hetGla1 hetGla1, Naked mole-rat, Jul. 2011, Beijing Genomics Institute HetGla_1.0 -monDom5 monDom5, Opossum, Oct. 2006, Broad Institute release MonDom5 -monDom4 monDom4, Opossum, Jan. 2006, Broad Institute release MonDom4 -monDom1 monDom1, Opossum, Oct. 2004, Broad Institute release MonDom1 -ponAbe2 ponAbe2, Orangutan, Jul. 2007, WUSTL Pongo_albelii-2.0.2 -ailMel1 ailMel1, Panda, Dec. 2009, BGI-Shenzhen AilMel 1.0 -susScr3 susScr3, Pig, Aug. 2011, Swine Genome Sequencing Consortium Sscrofa10.2 -susScr2 susScr2, Pig, Nov. 2009, Swine Genome Sequencing Consortium Sscrofa9.2 -ochPri3 ochPri3, Pika, May 2012, Broad Institute OchPri3.0 -ochPri2 ochPri2, Pika, Jul. 2008, Broad Institute OchPri2 -ornAna2 ornAna2, Platypus, Feb. 2007, WUSTL v5.0.1 -ornAna1 ornAna1, Platypus, Mar. 2007, WUSTL v5.0.1 -oryCun2 oryCun2, Rabbit, Apr. 2009, Broad Institute release OryCun2 -rn6 rn6, Rat, Jul. 2014, RGSC Rnor_6.0 -rn5 rn5, Rat, Mar. 2012, RGSC Rnor_5.0 -rn4 rn4, Rat, Nov. 2004, Baylor College of Medicine HGSC v3.4 -rn3 rn3, Rat, Jun. 2003, Baylor College of Medicine HGSC v3.1 -rheMac3 rheMac3, Rhesus, Oct. 2010, Beijing Genomics Institute CR_1.0 -rheMac2 rheMac2, Rhesus, Jan. 2006, Baylor College of Medicine HGSC v1.0 Mmul_051212 -proCap1 proCap1, Rock hyrax, Jul. 2008, Baylor College of Medicine HGSC Procap1.0 -oviAri3 oviAri3, Sheep, Aug. 2012, ISGC Oar_v3.1 -oviAri1 oviAri1, Sheep, Feb. 2010, ISGC Ovis aries 1.0 -sorAra2 sorAra2, Shrew, Aug. 2008, Broad Institute SorAra2.0 -sorAra1 sorAra1, Shrew, Jun. 2006, Broad Institute SorAra1.0 -choHof1 choHof1, Sloth, Jul. 2008, Broad Institute ChoHof1.0 -speTri2 speTri2, Squirrel, Nov. 2011, Broad Institute SpeTri2.0 -saiBol1 saiBol1, Squirrel monkey, Oct. 2011, Broad Institute SaiBol1.0 -tarSyr2 tarSyr2, Tarsier, Sep. 2013, WashU Tarsius_syrichta-2.0.1 -tarSyr1 tarSyr1, Tarsier, Aug. 2008, WUSTL/Broad Institute Tarsyr1.0 -sarHar1 sarHar1, Tasmanian devil, Feb. 2011, Wellcome Trust Sanger Institute Devil_refv7.0 -echTel2 echTel2, Tenrec, Nov. 2012, Broad Institute EchTel2.0 -echTel1 echTel1, Tenrec, Jul. 2005, Broad Institute echTel1 -tupBel1 tupBel1, Tree shrew, Dec. 2006, Broad Institute Tupbel1.0 -macEug2 macEug2, Wallaby, Sep. 2009, Tammar Wallaby Genome Sequencing Consortium Meug_1.1 -cerSim1 cerSim1, White rhinoceros, May 2012, Broad Institute CerSimSim1.0 -allMis1 allMis1, American alligator, Aug. 2012, Int. Crocodilian Genomes Working Group allMis0.2 -gadMor1 gadMor1, Atlantic cod, May 2010, Genofisk GadMor_May2010 -melUnd1 melUnd1, Budgerigar, Sep. 2011, WUSTL v6.3 -galGal4 galGal4, Chicken, Nov. -2011, ICGC Gallus-gallus-4.0 -galGal3 galGal3, Chicken, May 2006, WUSTL Gallus-gallus-2.1 -galGal2 galGal2, Chicken, Feb. 2004, WUSTL Gallus-gallus-1.0 -latCha1 latCha1, Coelacanth, Aug. 2011, Broad Institute LatCha1 -calMil1 calMil1, Elephant shark, Dec. 2013, IMCB Callorhinchus_milli_6.1.3 -fr3 fr3, Fugu, Oct. 2011, JGI v5.0 -fr2 fr2, Fugu, Oct. 2004, JGI v4.0 -fr1 fr1, Fugu, Aug. 2002, JGI v3.0 -petMar2 petMar2, Lamprey, Sep. 2010, WUGSC 7.0 -petMar1 petMar1, Lamprey, Mar. 2007, WUSTL v3.0 -anoCar2 anoCar2, Lizard, May 2010, Broad Institute AnoCar2 -anoCar1 anoCar1, Lizard, Feb. 2007, Broad Institute AnoCar1 -oryLat2 oryLat2, Medaka, Oct. 2005, NIG v1.0 -geoFor1 geoFor1, Medium ground finch, Apr. 2012, BGI GeoFor_1.0 / NCBI 13302 -oreNil2 oreNil2, Nile tilapia, Jan. 2011, Broad Institute Release OreNil1.1 -chrPic1 chrPic1, Painted turtle, Dec. 2011, IPTGSC Chrysemys_picta_bellii-3.0.1 -gasAcu1 gasAcu1, Stickleback, Feb. 2006, Broad Institute Release 1.0 -tetNig2 tetNig2, Tetraodon, Mar. 2007, Genoscope v7 -tetNig1 tetNig1, Tetraodon, Feb. 2004, Genoscope v7 -melGal1 melGal1, Turkey, Dec. 2009, Turkey Genome Consortium v2.01 -xenTro7 xenTro7, X. tropicalis, Sep. 2012, JGI v.7.0 -xenTro3 xenTro3, X. tropicalis, Nov. 2009, JGI v.4.2 -xenTro2 xenTro2, X. tropicalis, Aug. 2005, JGI v.4.1 -xenTro1 xenTro1, X. tropicalis, Oct. 2004, JGI v.3.0 -taeGut2 taeGut2, Zebra finch, Feb. 2013, WashU taeGut324 -taeGut1 taeGut1, Zebra finch, Jul. 2008, WUSTL v3.2.4 -danRer10 danRer10, Zebrafish, Sep. 2014, Genome Reference Consortium GRCz10 -danRer7 danRer7, Zebrafish, Jul. 2010, Sanger Institute Zv9 -danRer6 danRer6, Zebrafish, Dec. 2008, Sanger Institute Zv8 -danRer5 danRer5, Zebrafish, Jul. 2007, Sanger Institute Zv7 -danRer4 danRer4, Zebrafish, Mar. 2006, Sanger Institute Zv6 -danRer3 danRer3, Zebrafish, May 2005, Sanger Institute Zv5 -ci2 ci2, C. intestinalis, Mar. 2005, JGI v2.0 -ci1 ci1, C. intestinalis, Dec. 2002, JGI v1.0 -braFlo1 braFlo1, Lancelet, Mar. 2006, JGI v1.0 -strPur2 strPur2, S. purpuratus, Sep. 2006, Baylor College of Medicine HGSC v. Spur 2.1 -strPur1 strPur1, S. purpuratus, Apr. 2005, Baylor College of Medicine HGSC v. Spur_0.5 -apiMel2 apiMel2, A. mellifera, Jan. 2005, Baylor College of Medicine HGSC v.Amel_2.0 -apiMel1 apiMel1, A. mellifera, Jul. 2004, Baylor College of Medicine HGSC v.Amel_1.2 -anoGam1 anoGam1, A. gambiae, Feb. 2003, IAGP v.MOZ2 -droAna2 droAna2, D. ananassae, Aug. 2005, Agencourt Arachne release -droAna1 droAna1, D. ananassae, Jul. 2004, TIGR Celera release -droEre1 droEre1, D. erecta, Aug. 2005, Agencourt Arachne release -droGri1 droGri1, D. grimshawi, Aug. 2005, Agencourt Arachne release -dm6 dm6, D. melanogaster, Aug. 2014, BDGP Release 6 + ISO1 MT -dm3 dm3, D. melanogaster, Apr. 2006, BDGP Release 5 -dm2 dm2, D. melanogaster, Apr. 2004, BDGP Release 4 -dm1 dm1, D. melanogaster, Jan. 2003, BDGP Release 3 -droMoj2 droMoj2, D. mojavensis, Aug. 2005, Agencourt Arachne release -droMoj1 droMoj1, D. mojavensis, Aug. 2004, Agencourt Arachne release -droPer1 droPer1, D. persimilis, Oct. 2005, Broad Institute release -dp3 dp3, D. pseudoobscura, Nov. 2004, Flybase Release 1.0 -dp2 dp2, D. pseudoobscura, Aug. 2003, Baylor College of Medicine HGSC Freeze 1 -droSec1 droSec1, D. sechellia, Oct. 2005, Broad Institute Release 1.0 -droSim1 droSim1, D. simulans, Apr. 2005, WUSTL Release 1.0 -droVir2 droVir2, D. virilis, Aug. 2005, Agencourt Arachne release -droVir1 droVir1, D. virilis, Jul. 2004, Agencourt Arachne release -droYak2 droYak2, D. yakuba, Nov. 2005, WUSTL Release 2.0 -droYak1 droYak1, D. yakuba, Apr. 2004, WUSTL Release 1.0 -caePb2 caePb2, C. brenneri, Feb. 2008, WUSTL 6.0.1 -caePb1 caePb1, C. brenneri, Jan. 2007, WUSTL 4.0 -cb3 cb3, C. briggsae, Jan. 2007, WUSTL Cb3 -cb1 cb1, C. briggsae, Jul. 2002, WormBase v. cb25.agp8 -ce10 ce10, C. elegans, Oct. 2010, WormBase v. WS220 -ce6 ce6, C. elegans, May 2008, WormBase v. WS190 -ce4 ce4, C. elegans, Jan. 2007, WormBase v. WS170 -ce2 ce2, C. elegans, Mar. 2004, WormBase v. WS120 -caeJap1 caeJap1, C. japonica, Mar. 2008, WUSTL 3.0.2 -caeRem3 caeRem3, C. remanei, May 2007, WUSTL 15.0.1 -caeRem2 caeRem2, C. remanei, Mar. 2006, WUSTL 1.0 -priPac1 priPac1, P. pacificus, Feb. 2007, WUSTL 5.0 -aplCal1 aplCal1, Sea Hare, Sep. 2008, Broad Release Aplcal2.0 -sacCer3 sacCer3, Yeast, April 2011, SGD April 2011 sequence -sacCer2 sacCer2, Yeast, June 2008, SGD June 2008 sequence -sacCer1 sacCer1, Yeast, Oct. 2003, SGD 1 Oct 2003 sequence -eboVir3 eboVir3, Ebola Virus, June 2014, Sierra Leone 2014 (G3683/KM034562.1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/org_name.loc Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,20 @@ +Anopheles gambiae (org.Ag.eg.db) org.Ag.eg.db +Arabidopsis thaliana (org.At.tair.db) org.At.tair.db +Bos taurus (org.Bt.eg.db) org.Bt.eg.db +Caenorhabditis elegans (org.Ce.eg.db) org.Ce.eg.db +Canis familiaris (org.Cf.eg.db) org.Cf.eg.db +Drosophila melanogaster (org.Dm.eg.db) org.Dm.eg.db +Danio rerio (org.Dr.eg.db) org.Dr.eg.db +Escherichia coli (org.EcK12.eg.db) org.EcK12.eg.db +Escherichia coli (org.EcSakai.eg.db) org.EcSakai.eg.db +Gallus gallus (org.Gg.eg.db) org.Gg.eg.db +Homo sapiens (org.Hs.eg.db) org.Hs.eg.db +Mus musculus (org.Mm.eg.db) org.Mm.eg.db +Macaca mulatta (org.Mmu.eg.db) org.Mmu.eg.db +Plasmodium falciparum (org.Pf.plasmo.db) org.Pf.plasmo.db +Pan troglodytes (org.Pt.eg.db) org.Pt.eg.db +Rattus norvegicus (org.Rn.eg.db) org.Rn.eg.db +Saccharomyces cerevisiae (org.Sc.sgd.db) org.Sc.sgd.db +Sus scrofa (org.Ss.eg.db) org.Ss.eg.db +Toxoplasma gondii (org.Tgondii.eg.db) org.Tgondii.eg.db +Xenopus laevis (org.Xl.eg.db) org.Xl.eg.db
--- a/test-data/tool_data_table_conf.xml Mon Mar 07 14:35:53 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -<tables> - <!-- Location of Picard dict file and other files --> - <table name="go_genomes" comment_char="#"> - <columns>value, name</columns> - <file path="${__HERE__}/test-data/genomes.loc" /> - </table> - <table name="go_gene_ids" comment_char="#"> - <columns>value, name</columns> - <file path="${__HERE__}/test-data/gene_ids.loc" /> - </table> -</tables> \ No newline at end of file
--- a/test-data/wal.tab Mon Mar 07 14:35:53 2016 -0500 +++ b/test-data/wal.tab Thu Mar 31 12:23:45 2016 -0400 @@ -1,3 +1,3 @@ -category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p.adjust.over_represented p.adjust.under_represented -GO:0000278 0.0122606865510724 0.999300084010281 4 5 mitotic cell cycle BP 0.0245213731021448 0.999300084010281 -GO:0000003 1 0.796172371987733 0 1 reproduction BP 1 0.999300084010281 +category over_represented_pvalue under_represented_pvalue numDEInCat numInCat term ontology p\.adjust.over_represented p\.adjust.under_represented +GO:0000278 0\.0122.+ 0\.999.+ 4 5 mitotic cell cycle BP 0\.0245.+ 0\.999.+ +GO:0000003 1 0\.796.+ 0 1 reproduction BP 1 0\.999.+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/available_categories.loc.sample Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,90 @@ +ENZYME ENZYME org.Ag.eg.db +GO2ALLEGS GO2ALLEGS org.Ag.eg.db +PATH PATH org.Ag.eg.db +PMID PMID org.Ag.eg.db +ENZYME ENZYME org.At.tair.db +GO2ALLTAIRS GO2ALLTAIRS org.At.tair.db +PATH PATH org.At.tair.db +PMID PMID org.At.tair.db +ENZYME ENZYME org.Bt.eg.db +GO2ALLEGS GO2ALLEGS org.Bt.eg.db +PATH PATH org.Bt.eg.db +PFAM PFAM org.Bt.eg.db +PMID PMID org.Bt.eg.db +PROSITE PROSITE org.Bt.eg.db +ENZYME ENZYME org.Ce.eg.db +GO2ALLEGS GO2ALLEGS org.Ce.eg.db +PATH PATH org.Ce.eg.db +PMID PMID org.Ce.eg.db +ENZYME ENZYME org.Cf.eg.db +GO2ALLEGS GO2ALLEGS org.Cf.eg.db +PATH PATH org.Cf.eg.db +PMID PMID org.Cf.eg.db +ENZYME ENZYME org.Dm.eg.db +GO2ALLEGS GO2ALLEGS org.Dm.eg.db +PATH PATH org.Dm.eg.db +PMID PMID org.Dm.eg.db +ENZYME ENZYME org.Dr.eg.db +GO2ALLEGS GO2ALLEGS org.Dr.eg.db +PATH PATH org.Dr.eg.db +PFAM PFAM org.Dr.eg.db +PMID PMID org.Dr.eg.db +PROSITE PROSITE org.Dr.eg.db +ENZYME ENZYME org.EcK12.eg.db +GO2ALLEGS GO2ALLEGS org.EcK12.eg.db +PATH PATH org.EcK12.eg.db +PMID PMID org.EcK12.eg.db +ENZYME ENZYME org.EcSakai.eg.db +GO2ALLEGS GO2ALLEGS org.EcSakai.eg.db +PATH PATH org.EcSakai.eg.db +PMID PMID org.EcSakai.eg.db +ENZYME ENZYME org.Gg.eg.db +GO2ALLEGS GO2ALLEGS org.Gg.eg.db +PATH PATH org.Gg.eg.db +PFAM PFAM org.Gg.eg.db +PMID PMID org.Gg.eg.db +PROSITE PROSITE org.Gg.eg.db +ENZYME ENZYME org.Hs.eg.db +GO2ALLEGS GO2ALLEGS org.Hs.eg.db +PATH PATH org.Hs.eg.db +PFAM PFAM org.Hs.eg.db +PMID PMID org.Hs.eg.db +PROSITE PROSITE org.Hs.eg.db +ENZYME ENZYME org.Mm.eg.db +GO2ALLEGS GO2ALLEGS org.Mm.eg.db +PATH PATH org.Mm.eg.db +PFAM PFAM org.Mm.eg.db +PMID PMID org.Mm.eg.db +PROSITE PROSITE org.Mm.eg.db +ENZYME ENZYME org.Mmu.eg.db +GO2ALLEGS GO2ALLEGS org.Mmu.eg.db +PATH PATH org.Mmu.eg.db +PMID PMID org.Mmu.eg.db +ENZYME ENZYME org.Pf.plasmo.db +GO2ALLORFS GO2ALLORFS org.Pf.plasmo.db +PATH PATH org.Pf.plasmo.db +ENZYME ENZYME org.Pt.eg.db +GO2ALLEGS GO2ALLEGS org.Pt.eg.db +PATH PATH org.Pt.eg.db +PMID PMID org.Pt.eg.db +ENZYME ENZYME org.Rn.eg.db +GO2ALLEGS GO2ALLEGS org.Rn.eg.db +PATH PATH org.Rn.eg.db +PFAM PFAM org.Rn.eg.db +PMID PMID org.Rn.eg.db +PROSITE PROSITE org.Rn.eg.db +ENZYME ENZYME org.Sc.sgd.db +GO2ALLORFS GO2ALLORFS org.Sc.sgd.db +PATH PATH org.Sc.sgd.db +PFAM PFAM org.Sc.sgd.db +PMID PMID org.Sc.sgd.db +ENZYME ENZYME org.Ss.eg.db +GO2ALLEGS GO2ALLEGS org.Ss.eg.db +PATH PATH org.Ss.eg.db +PMID PMID org.Ss.eg.db +GO2ALLEGS GO2ALLEGS org.Tgondii.eg.db +PMID PMID org.Tgondii.eg.db +ENZYME ENZYME org.Xl.eg.db +GO2ALLEGS GO2ALLEGS org.Xl.eg.db +PATH PATH org.Xl.eg.db +PMID PMID org.Xl.eg.db
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/available_identifiers.loc.sample Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,137 @@ +ENTREZ ENTREZ org.Ag.eg.db +ENTREZ ENTREZ org.At.tair.db +ENTREZ ENTREZ org.Bt.eg.db +ENTREZ ENTREZ org.Ce.eg.db +ENTREZ ENTREZ org.Cf.eg.db +ENTREZ ENTREZ org.Dm.eg.db +ENTREZ ENTREZ org.Dr.eg.db +ENTREZ ENTREZ org.EcK12.eg.db +ENTREZ ENTREZ org.EcSakai.eg.db +ENTREZ ENTREZ org.Gg.eg.db +ENTREZ ENTREZ org.Hs.eg.db +ENTREZ ENTREZ org.Mm.eg.db +ENTREZ ENTREZ org.Mmu.eg.db +ENTREZ ENTREZ org.Pf.plasmo.db +ENTREZ ENTREZ org.Pt.eg.db +ENTREZ ENTREZ org.Rn.eg.db +ENTREZ ENTREZ org.Sc.sgd.db +ENTREZ ENTREZ org.Ss.eg.db +ENTREZ ENTREZ org.Tgondii.eg.db +ENTREZ ENTREZ org.Xl.eg.db +ACCNUM ACCNUM org.Ag.eg.db +ENSEMBL ENSEMBL org.Ag.eg.db +GENENAME GENENAME org.Ag.eg.db +REFSEQ REFSEQ org.Ag.eg.db +SYMBOL SYMBOL org.Ag.eg.db +UNIGENE UNIGENE org.Ag.eg.db +UNIPROT UNIPROT org.Ag.eg.db +GENENAME GENENAME org.At.tair.db +REFSEQ REFSEQ org.At.tair.db +SYMBOL SYMBOL org.At.tair.db +ACCNUM ACCNUM org.Bt.eg.db +ENSEMBL ENSEMBL org.Bt.eg.db +GENENAME GENENAME org.Bt.eg.db +REFSEQ REFSEQ org.Bt.eg.db +SYMBOL SYMBOL org.Bt.eg.db +UNIGENE UNIGENE org.Bt.eg.db +UNIPROT UNIPROT org.Bt.eg.db +ACCNUM ACCNUM org.Ce.eg.db +ENSEMBL ENSEMBL org.Ce.eg.db +GENENAME GENENAME org.Ce.eg.db +REFSEQ REFSEQ org.Ce.eg.db +SYMBOL SYMBOL org.Ce.eg.db +UNIGENE UNIGENE org.Ce.eg.db +UNIPROT UNIPROT org.Ce.eg.db +ACCNUM ACCNUM org.Cf.eg.db +ENSEMBL ENSEMBL org.Cf.eg.db +GENENAME GENENAME org.Cf.eg.db +REFSEQ REFSEQ org.Cf.eg.db +SYMBOL SYMBOL org.Cf.eg.db +UNIGENE UNIGENE org.Cf.eg.db +UNIPROT UNIPROT org.Cf.eg.db +ACCNUM ACCNUM org.Dm.eg.db +ENSEMBL ENSEMBL org.Dm.eg.db +FLYBASECG FLYBASECG org.Dm.eg.db +GENENAME GENENAME org.Dm.eg.db +REFSEQ REFSEQ org.Dm.eg.db +SYMBOL SYMBOL org.Dm.eg.db +UNIGENE UNIGENE org.Dm.eg.db +UNIPROT UNIPROT org.Dm.eg.db +ACCNUM ACCNUM org.Dr.eg.db +ENSEMBL ENSEMBL org.Dr.eg.db +GENENAME GENENAME org.Dr.eg.db +REFSEQ REFSEQ org.Dr.eg.db +SYMBOL SYMBOL org.Dr.eg.db +UNIGENE UNIGENE org.Dr.eg.db +UNIPROT UNIPROT org.Dr.eg.db +ACCNUM ACCNUM org.EcK12.eg.db +GENENAME GENENAME org.EcK12.eg.db +REFSEQ REFSEQ org.EcK12.eg.db +SYMBOL SYMBOL org.EcK12.eg.db +ACCNUM ACCNUM org.EcSakai.eg.db +GENENAME GENENAME org.EcSakai.eg.db +REFSEQ REFSEQ org.EcSakai.eg.db +SYMBOL SYMBOL org.EcSakai.eg.db +ACCNUM ACCNUM org.Gg.eg.db +ENSEMBL ENSEMBL org.Gg.eg.db +GENENAME GENENAME org.Gg.eg.db +REFSEQ REFSEQ org.Gg.eg.db +SYMBOL SYMBOL org.Gg.eg.db +UNIGENE UNIGENE org.Gg.eg.db +UNIPROT UNIPROT org.Gg.eg.db +ACCNUM ACCNUM org.Hs.eg.db +ENSEMBL ENSEMBL org.Hs.eg.db +GENENAME GENENAME org.Hs.eg.db +REFSEQ REFSEQ org.Hs.eg.db +SYMBOL SYMBOL org.Hs.eg.db +UNIGENE UNIGENE org.Hs.eg.db +UNIPROT UNIPROT org.Hs.eg.db +ACCNUM ACCNUM org.Mm.eg.db +ENSEMBL ENSEMBL org.Mm.eg.db +GENENAME GENENAME org.Mm.eg.db +REFSEQ REFSEQ org.Mm.eg.db +SYMBOL SYMBOL org.Mm.eg.db +UNIGENE UNIGENE org.Mm.eg.db +UNIPROT UNIPROT org.Mm.eg.db +ACCNUM ACCNUM org.Mmu.eg.db +ENSEMBL ENSEMBL org.Mmu.eg.db +GENENAME GENENAME org.Mmu.eg.db +REFSEQ REFSEQ org.Mmu.eg.db +SYMBOL SYMBOL org.Mmu.eg.db +UNIPROT UNIPROT org.Mmu.eg.db +GENENAME GENENAME org.Pf.plasmo.db +SYMBOL SYMBOL org.Pf.plasmo.db +ACCNUM ACCNUM org.Pt.eg.db +ENSEMBL ENSEMBL org.Pt.eg.db +GENENAME GENENAME org.Pt.eg.db +REFSEQ REFSEQ org.Pt.eg.db +SYMBOL SYMBOL org.Pt.eg.db +UNIPROT UNIPROT org.Pt.eg.db +ACCNUM ACCNUM org.Rn.eg.db +ENSEMBL ENSEMBL org.Rn.eg.db +GENENAME GENENAME org.Rn.eg.db +REFSEQ REFSEQ org.Rn.eg.db +SYMBOL SYMBOL org.Rn.eg.db +UNIGENE UNIGENE org.Rn.eg.db +UNIPROT UNIPROT org.Rn.eg.db +ENSEMBL ENSEMBL org.Sc.sgd.db +GENENAME GENENAME org.Sc.sgd.db +REFSEQ REFSEQ org.Sc.sgd.db +UNIPROT UNIPROT org.Sc.sgd.db +ACCNUM ACCNUM org.Ss.eg.db +GENENAME GENENAME org.Ss.eg.db +REFSEQ REFSEQ org.Ss.eg.db +SYMBOL SYMBOL org.Ss.eg.db +UNIGENE UNIGENE org.Ss.eg.db +UNIPROT UNIPROT org.Ss.eg.db +ACCNUM ACCNUM org.Tgondii.eg.db +GENENAME GENENAME org.Tgondii.eg.db +REFSEQ REFSEQ org.Tgondii.eg.db +SYMBOL SYMBOL org.Tgondii.eg.db +UNIGENE UNIGENE org.Tgondii.eg.db +ACCNUM ACCNUM org.Xl.eg.db +GENENAME GENENAME org.Xl.eg.db +REFSEQ REFSEQ org.Xl.eg.db +SYMBOL SYMBOL org.Xl.eg.db +UNIGENE UNIGENE org.Xl.eg.db +UNIPROT UNIPROT org.Xl.eg.db
--- a/tool-data/gene_ids.loc.sample Mon Mar 07 14:35:53 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -knownGene knownGene, UCSC Genes, NA, Entrez Gene ID -knownGeneOld3 knownGeneOld3, Old UCSC Genes, NA, -ccdsGene ccdsGene, CCDS, NA, -refGene refGene, RefSeq Genes, NA, Entrez Gene ID -xenoRefGene xenoRefGene, Other RefSeq, NA, -vegaGene vegaGene, Vega Genes, Vega Protein Genes, HAVANA Pseudogene ID -vegaPseudoGene vegaPseudoGene, Vega Genes, Vega Pseudogenes, HAVANA Pseudogene ID -ensGene ensGene, Ensembl Genes, NA, Ensembl gene ID -acembly acembly, AceView Genes, NA, -sibGene sibGene, SIB Genes, NA, -nscanPasaGene nscanPasaGene, N-SCAN, N-SCAN PASA-EST, -nscanGene nscanGene, N-SCAN, N-SCAN, -sgpGene sgpGene, SGP Genes, NA, -geneid geneid, Geneid Genes, NA, -genscan genscan, Genscan Genes, NA, -exoniphy exoniphy, Exoniphy, NA, -augustusHints augustusHints, Augustus, Augustus Hints, -augustusXRA augustusXRA, Augustus, Augustus De Novo, -augustusAbinitio augustusAbinitio, Augustus, Augustus Ab Initio, -acescan acescan, ACEScan, NA, -lincRNAsTranscripts lincRNAsTranscripts, lincRNAsTranscripts, NA, Name of gene -wgEncodeGencodeManualV3 wgEncodeGencodeManualV3, Gencode Genes, Gencode Manual, Ensembl gene ID -wgEncodeGencodeAutoV3 wgEncodeGencodeAutoV3, Gencode Genes, Gencode Auto, Ensembl gene ID -wgEncodeGencodePolyaV3 wgEncodeGencodePolyaV3, Gencode Genes, Gencode PolyA, Ensembl gene ID -wgEncodeGencodeBasicV17 wgEncodeGencodeBasicV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodeCompV17 wgEncodeGencodeCompV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodePseudoGeneV17 wgEncodeGencodePseudoGeneV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencode2wayConsPseudoV17 wgEncodeGencode2wayConsPseudoV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodePolyaV17 wgEncodeGencodePolyaV17, GENCODE Genes V17, NA, Ensembl gene ID -wgEncodeGencodeBasicV14 wgEncodeGencodeBasicV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodeCompV14 wgEncodeGencodeCompV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodePseudoGeneV14 wgEncodeGencodePseudoGeneV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencode2wayConsPseudoV14 wgEncodeGencode2wayConsPseudoV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodePolyaV14 wgEncodeGencodePolyaV14, GENCODE Genes V14, NA, Ensembl gene ID -wgEncodeGencodeBasicV7 wgEncodeGencodeBasicV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencodeCompV7 wgEncodeGencodeCompV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencodePseudoGeneV7 wgEncodeGencodePseudoGeneV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencode2wayConsPseudoV7 wgEncodeGencode2wayConsPseudoV7, GENCODE Genes V7, NA, Ensembl gene ID -wgEncodeGencodePolyaV7 wgEncodeGencodePolyaV7, GENCODE Genes V7, NA, Ensembl gene ID -flyBaseGene flyBaseGene, FlyBase Genes, NA, Name of canonical transcript in cluster -sgdGene sgdGene, SGD Genes, NA, Name of canonical transcript in cluster -geneSymbol geneSymbol, refGene, refFlat, Gene Symbol
--- a/tool-data/genomes.loc.sample Mon Mar 07 14:35:53 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,170 +0,0 @@ -hg38 hg38, Human, Dec. 2013, Genome Reference Consortium GRCh38 -hg19 hg19, Human, Feb. 2009, Genome Reference Consortium GRCh37 -hg18 hg18, Human, Mar. 2006, NCBI Build 36.1 -hg17 hg17, Human, May 2004, NCBI Build 35 -hg16 hg16, Human, Jul. 2003, NCBI Build 34 -vicPac2 vicPac2, Alpaca, Mar. 2013, Broad Institute Vicugna_pacos-2.0.1 -vicPac1 vicPac1, Alpaca, Jul. 2008, Broad Institute VicPac1.0 -dasNov3 dasNov3, Armadillo, Dec. 2011, Broad Institute DasNov3 -otoGar3 otoGar3, Bushbaby, Mar. 2011, Broad Institute OtoGar3 -papHam1 papHam1, Baboon, Nov. 2008, Baylor College of Medicine HGSC Pham_1.0 -papAnu2 papAnu2, Baboon, Mar. 2012, Baylor College of Medicine Panu_2.0 -felCat8 felCat8, Cat, Nov. 2014, ICGSC Felis_catus_8.0 -felCat5 felCat5, Cat, Sep. 2011, ICGSC Felis_catus-6.2 -felCat4 felCat4, Cat, Dec. 2008, NHGRI catChrV17e -felCat3 felCat3, Cat, Mar. 2006, Broad Institute Release 3 -panTro4 panTro4, Chimp, Feb. 2011, CGSC Build 2.1.4 -panTro3 panTro3, Chimp, Oct. 2010, CGSC Build 2.1.3 -panTro2 panTro2, Chimp, Mar. 2006, CGSC Build 2.1 -panTro1 panTro1, Chimp, Nov. 2003, CGSC Build 1.1 -criGri1 criGri1, Chinese hamster, Jul. 2013, Beijing Genomics Institution-Shenzhen C_griseus_v1.0 -bosTau8 bosTau8, Cow, Jun. 2014, University of Maryland v3.1.1 -bosTau7 bosTau7, Cow, Oct. 2011, Baylor College of Medicine HGSC Btau_4.6.1 -bosTau6 bosTau6, Cow, Nov. 2009, University of Maryland v3.1 -bosTau4 bosTau4, Cow, Oct. 2007, Baylor College of Medicine HGSC Btau_4.0 -bosTau3 bosTau3, Cow, Aug. 2006, Baylor College of Medicine HGSC Btau_3.1 -bosTau2 bosTau2, Cow, Mar. 2005, Baylor College of Medicine HGSC Btau_2.0 -canFam3 canFam3, Dog, Sep. 2011, Broad Institute v3.1 -canFam2 canFam2, Dog, May 2005, Broad Institute v2.0 -canFam1 canFam1, Dog, Jul. 2004, Broad Institute v1.0 -turTru2 turTru2, Dolphin, Oct. 2011, Baylor College of Medicine Ttru_1.4 -loxAfr3 loxAfr3, Elephant, Jul. 2009, Broad Institute LoxAfr3 -musFur1 musFur1, Ferret, Apr. 2011, Ferret Genome Sequencing Consortium MusPutFur1.0 -nomLeu3 nomLeu3, Gibbon, Oct. 2012, Gibbon Genome Sequencing Consortium Nleu3.0 -nomLeu2 nomLeu2, Gibbon, Jun. 2011, Gibbon Genome Sequencing Consortium Nleu1.1 -nomLeu1 nomLeu1, Gibbon, Jan. 2010, Gibbon Genome Sequencing Consortium Nleu1.0 -gorGor3 gorGor3, Gorilla, May 2011, Wellcome Trust Sanger Institute gorGor3.1 -cavPor3 cavPor3, Guinea pig, Feb. 2008, Broad Institute cavPor3 -eriEur2 eriEur2, Hedgehog, May 2012, Broad Institute EriEur2.0 -eriEur1 eriEur1, Hedgehog, Jun. 2006, Broad Institute Draft_v1 -equCab2 equCab2, Horse, Sep. 2007, Broad Institute EquCab2 -equCab1 equCab1, Horse, Jan. 2007, Broad Institute EquCab1 -dipOrd1 dipOrd1, Kangaroo rat, Jul. 2008, Baylor/Broad Institute DipOrd1.0 -triMan1 triMan1, Manatee, Oct. 2011, Broad Institute TriManLat1.0 -calJac3 calJac3, Marmoset, Mar. 2009, WUSTL Callithrix_jacchus-v3.2 -calJac1 calJac1, Marmoset, Jun. 2007, WUSTL Callithrix_jacchus-v2.0.2 -pteVam1 pteVam1, Megabat, Jul. 2008, Broad Institute Ptevap1.0 -myoLuc2 myoLuc2, Microbat, Jul. 2010, Broad Institute MyoLuc2.0 -balAcu1 balAcu1, Minke whale, Oct. 2013, KORDI BalAcu1.0 -mm10 mm10, Mouse, Dec. 2011, Genome Reference Consortium GRCm38 -mm9 mm9, Mouse, Jul. 2007, NCBI Build 37 -mm8 mm8, Mouse, Feb. 2006, NCBI Build 36 -mm7 mm7, Mouse, Aug. 2005, NCBI Build 35 -micMur2 micMur2, Mouse lemur, May 2015, Baylor/Broad Institute Mmur_2.0 -micMur1 micMur1, Mouse lemur, Jul. 2007, Broad Institute MicMur1.0 -hetGla2 hetGla2, Naked mole-rat, Jan. 2012, Broad Institute HetGla_female_1.0 -hetGla1 hetGla1, Naked mole-rat, Jul. 2011, Beijing Genomics Institute HetGla_1.0 -monDom5 monDom5, Opossum, Oct. 2006, Broad Institute release MonDom5 -monDom4 monDom4, Opossum, Jan. 2006, Broad Institute release MonDom4 -monDom1 monDom1, Opossum, Oct. 2004, Broad Institute release MonDom1 -ponAbe2 ponAbe2, Orangutan, Jul. 2007, WUSTL Pongo_albelii-2.0.2 -ailMel1 ailMel1, Panda, Dec. 2009, BGI-Shenzhen AilMel 1.0 -susScr3 susScr3, Pig, Aug. 2011, Swine Genome Sequencing Consortium Sscrofa10.2 -susScr2 susScr2, Pig, Nov. 2009, Swine Genome Sequencing Consortium Sscrofa9.2 -ochPri3 ochPri3, Pika, May 2012, Broad Institute OchPri3.0 -ochPri2 ochPri2, Pika, Jul. 2008, Broad Institute OchPri2 -ornAna2 ornAna2, Platypus, Feb. 2007, WUSTL v5.0.1 -ornAna1 ornAna1, Platypus, Mar. 2007, WUSTL v5.0.1 -oryCun2 oryCun2, Rabbit, Apr. 2009, Broad Institute release OryCun2 -rn6 rn6, Rat, Jul. 2014, RGSC Rnor_6.0 -rn5 rn5, Rat, Mar. 2012, RGSC Rnor_5.0 -rn4 rn4, Rat, Nov. 2004, Baylor College of Medicine HGSC v3.4 -rn3 rn3, Rat, Jun. 2003, Baylor College of Medicine HGSC v3.1 -rheMac3 rheMac3, Rhesus, Oct. 2010, Beijing Genomics Institute CR_1.0 -rheMac2 rheMac2, Rhesus, Jan. 2006, Baylor College of Medicine HGSC v1.0 Mmul_051212 -proCap1 proCap1, Rock hyrax, Jul. 2008, Baylor College of Medicine HGSC Procap1.0 -oviAri3 oviAri3, Sheep, Aug. 2012, ISGC Oar_v3.1 -oviAri1 oviAri1, Sheep, Feb. 2010, ISGC Ovis aries 1.0 -sorAra2 sorAra2, Shrew, Aug. 2008, Broad Institute SorAra2.0 -sorAra1 sorAra1, Shrew, Jun. 2006, Broad Institute SorAra1.0 -choHof1 choHof1, Sloth, Jul. 2008, Broad Institute ChoHof1.0 -speTri2 speTri2, Squirrel, Nov. 2011, Broad Institute SpeTri2.0 -saiBol1 saiBol1, Squirrel monkey, Oct. 2011, Broad Institute SaiBol1.0 -tarSyr2 tarSyr2, Tarsier, Sep. 2013, WashU Tarsius_syrichta-2.0.1 -tarSyr1 tarSyr1, Tarsier, Aug. 2008, WUSTL/Broad Institute Tarsyr1.0 -sarHar1 sarHar1, Tasmanian devil, Feb. 2011, Wellcome Trust Sanger Institute Devil_refv7.0 -echTel2 echTel2, Tenrec, Nov. 2012, Broad Institute EchTel2.0 -echTel1 echTel1, Tenrec, Jul. 2005, Broad Institute echTel1 -tupBel1 tupBel1, Tree shrew, Dec. 2006, Broad Institute Tupbel1.0 -macEug2 macEug2, Wallaby, Sep. 2009, Tammar Wallaby Genome Sequencing Consortium Meug_1.1 -cerSim1 cerSim1, White rhinoceros, May 2012, Broad Institute CerSimSim1.0 -allMis1 allMis1, American alligator, Aug. 2012, Int. Crocodilian Genomes Working Group allMis0.2 -gadMor1 gadMor1, Atlantic cod, May 2010, Genofisk GadMor_May2010 -melUnd1 melUnd1, Budgerigar, Sep. 2011, WUSTL v6.3 -galGal4 galGal4, Chicken, Nov. -2011, ICGC Gallus-gallus-4.0 -galGal3 galGal3, Chicken, May 2006, WUSTL Gallus-gallus-2.1 -galGal2 galGal2, Chicken, Feb. 2004, WUSTL Gallus-gallus-1.0 -latCha1 latCha1, Coelacanth, Aug. 2011, Broad Institute LatCha1 -calMil1 calMil1, Elephant shark, Dec. 2013, IMCB Callorhinchus_milli_6.1.3 -fr3 fr3, Fugu, Oct. 2011, JGI v5.0 -fr2 fr2, Fugu, Oct. 2004, JGI v4.0 -fr1 fr1, Fugu, Aug. 2002, JGI v3.0 -petMar2 petMar2, Lamprey, Sep. 2010, WUGSC 7.0 -petMar1 petMar1, Lamprey, Mar. 2007, WUSTL v3.0 -anoCar2 anoCar2, Lizard, May 2010, Broad Institute AnoCar2 -anoCar1 anoCar1, Lizard, Feb. 2007, Broad Institute AnoCar1 -oryLat2 oryLat2, Medaka, Oct. 2005, NIG v1.0 -geoFor1 geoFor1, Medium ground finch, Apr. 2012, BGI GeoFor_1.0 / NCBI 13302 -oreNil2 oreNil2, Nile tilapia, Jan. 2011, Broad Institute Release OreNil1.1 -chrPic1 chrPic1, Painted turtle, Dec. 2011, IPTGSC Chrysemys_picta_bellii-3.0.1 -gasAcu1 gasAcu1, Stickleback, Feb. 2006, Broad Institute Release 1.0 -tetNig2 tetNig2, Tetraodon, Mar. 2007, Genoscope v7 -tetNig1 tetNig1, Tetraodon, Feb. 2004, Genoscope v7 -melGal1 melGal1, Turkey, Dec. 2009, Turkey Genome Consortium v2.01 -xenTro7 xenTro7, X. tropicalis, Sep. 2012, JGI v.7.0 -xenTro3 xenTro3, X. tropicalis, Nov. 2009, JGI v.4.2 -xenTro2 xenTro2, X. tropicalis, Aug. 2005, JGI v.4.1 -xenTro1 xenTro1, X. tropicalis, Oct. 2004, JGI v.3.0 -taeGut2 taeGut2, Zebra finch, Feb. 2013, WashU taeGut324 -taeGut1 taeGut1, Zebra finch, Jul. 2008, WUSTL v3.2.4 -danRer10 danRer10, Zebrafish, Sep. 2014, Genome Reference Consortium GRCz10 -danRer7 danRer7, Zebrafish, Jul. 2010, Sanger Institute Zv9 -danRer6 danRer6, Zebrafish, Dec. 2008, Sanger Institute Zv8 -danRer5 danRer5, Zebrafish, Jul. 2007, Sanger Institute Zv7 -danRer4 danRer4, Zebrafish, Mar. 2006, Sanger Institute Zv6 -danRer3 danRer3, Zebrafish, May 2005, Sanger Institute Zv5 -ci2 ci2, C. intestinalis, Mar. 2005, JGI v2.0 -ci1 ci1, C. intestinalis, Dec. 2002, JGI v1.0 -braFlo1 braFlo1, Lancelet, Mar. 2006, JGI v1.0 -strPur2 strPur2, S. purpuratus, Sep. 2006, Baylor College of Medicine HGSC v. Spur 2.1 -strPur1 strPur1, S. purpuratus, Apr. 2005, Baylor College of Medicine HGSC v. Spur_0.5 -apiMel2 apiMel2, A. mellifera, Jan. 2005, Baylor College of Medicine HGSC v.Amel_2.0 -apiMel1 apiMel1, A. mellifera, Jul. 2004, Baylor College of Medicine HGSC v.Amel_1.2 -anoGam1 anoGam1, A. gambiae, Feb. 2003, IAGP v.MOZ2 -droAna2 droAna2, D. ananassae, Aug. 2005, Agencourt Arachne release -droAna1 droAna1, D. ananassae, Jul. 2004, TIGR Celera release -droEre1 droEre1, D. erecta, Aug. 2005, Agencourt Arachne release -droGri1 droGri1, D. grimshawi, Aug. 2005, Agencourt Arachne release -dm6 dm6, D. melanogaster, Aug. 2014, BDGP Release 6 + ISO1 MT -dm3 dm3, D. melanogaster, Apr. 2006, BDGP Release 5 -dm2 dm2, D. melanogaster, Apr. 2004, BDGP Release 4 -dm1 dm1, D. melanogaster, Jan. 2003, BDGP Release 3 -droMoj2 droMoj2, D. mojavensis, Aug. 2005, Agencourt Arachne release -droMoj1 droMoj1, D. mojavensis, Aug. 2004, Agencourt Arachne release -droPer1 droPer1, D. persimilis, Oct. 2005, Broad Institute release -dp3 dp3, D. pseudoobscura, Nov. 2004, Flybase Release 1.0 -dp2 dp2, D. pseudoobscura, Aug. 2003, Baylor College of Medicine HGSC Freeze 1 -droSec1 droSec1, D. sechellia, Oct. 2005, Broad Institute Release 1.0 -droSim1 droSim1, D. simulans, Apr. 2005, WUSTL Release 1.0 -droVir2 droVir2, D. virilis, Aug. 2005, Agencourt Arachne release -droVir1 droVir1, D. virilis, Jul. 2004, Agencourt Arachne release -droYak2 droYak2, D. yakuba, Nov. 2005, WUSTL Release 2.0 -droYak1 droYak1, D. yakuba, Apr. 2004, WUSTL Release 1.0 -caePb2 caePb2, C. brenneri, Feb. 2008, WUSTL 6.0.1 -caePb1 caePb1, C. brenneri, Jan. 2007, WUSTL 4.0 -cb3 cb3, C. briggsae, Jan. 2007, WUSTL Cb3 -cb1 cb1, C. briggsae, Jul. 2002, WormBase v. cb25.agp8 -ce10 ce10, C. elegans, Oct. 2010, WormBase v. WS220 -ce6 ce6, C. elegans, May 2008, WormBase v. WS190 -ce4 ce4, C. elegans, Jan. 2007, WormBase v. WS170 -ce2 ce2, C. elegans, Mar. 2004, WormBase v. WS120 -caeJap1 caeJap1, C. japonica, Mar. 2008, WUSTL 3.0.2 -caeRem3 caeRem3, C. remanei, May 2007, WUSTL 15.0.1 -caeRem2 caeRem2, C. remanei, Mar. 2006, WUSTL 1.0 -priPac1 priPac1, P. pacificus, Feb. 2007, WUSTL 5.0 -aplCal1 aplCal1, Sea Hare, Sep. 2008, Broad Release Aplcal2.0 -sacCer3 sacCer3, Yeast, April 2011, SGD April 2011 sequence -sacCer2 sacCer2, Yeast, June 2008, SGD June 2008 sequence -sacCer1 sacCer1, Yeast, Oct. 2003, SGD 1 Oct 2003 sequence -eboVir3 eboVir3, Ebola Virus, June 2014, Sierra Leone 2014 (G3683/KM034562.1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/listing_available_identifiers_and_data_sources.r Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,34 @@ +# Use this script to generate the .loc.sample with available organisms, available gene identifier and available categories +library("reshape2") +# install all packages +packages=read.table("org_packages.tab") +biocLite(packages$V1, dependencies=TRUE) +# load all packages +lapply(packages$V1, require, character.only = TRUE) +# create package_name vector +pkg_name = sapply(packages$V1, function(x) paste("package:", x, sep="") ) +# list package_functions +organism_names = sapply(packages$V1, function(x) paste(eval( parse( text=paste( gsub (".db$", "", x ), "ORGANISM", sep="") ) ), paste( paste("(", x, sep=""), ")", sep=""))) +identifiers = c( "GENENAME", "UNIGENE", "UNIPROT", "REFSEQ", "SYMBOL", "ENSEMBL", "FLYBASECG", "ACCNUM" ) +org_name_tab = data.frame(organism_names, packages$V1) +categories = c( "PMID", "ENZYME", "GO2ALLEGS", "PATH", "GO2ALLTAIRS", "GO2ALLORFS", "PFAM", "PROSITE" ) + +# get dataframe suitable for galaxy's <filter></> tagset +filter_tab = melt(sapply(pkg_name, ls)) +filter_tab$L1 = sapply( filter_tab$L1, function(x) gsub( "package:", "", x) ) +patterns=paste(unique(sapply(filter_tab$L1, function(x) gsub( ".db$", "", x )) ), collapse="|") +filter_tab[,1] = gsub( patterns, "", filter_tab[,1] ) + +# add the ENTREZ id format to the available_identifiers + +available_identifiers = subset(filter_tab, value %in% identifiers) +available_identifiers = cbind(available_identifiers[,1], available_identifiers) +available_categories = subset(filter_tab, value %in% categories) +available_categories = cbind(available_categories[,1], available_categories) +entrez = data.frame(rep("ENTREZ", length(packages$V1)), rep("ENTREZ", length(packages$V1)), packages$V1) +colnames(entrez) = colnames(available_identifiers) +available_identifiers = rbind(entrez, available_identifiers) + +write.table(available_identifiers, file = "available_identifiers.loc.sample", sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE) +write.table(available_categories, file = "available_categories.loc.sample", sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE) +write.table(org_name_tab, file = "org_name.loc.sample", sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/org_name.loc.sample Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,20 @@ +Anopheles gambiae (org.Ag.eg.db) org.Ag.eg.db +Arabidopsis thaliana (org.At.tair.db) org.At.tair.db +Bos taurus (org.Bt.eg.db) org.Bt.eg.db +Caenorhabditis elegans (org.Ce.eg.db) org.Ce.eg.db +Canis familiaris (org.Cf.eg.db) org.Cf.eg.db +Drosophila melanogaster (org.Dm.eg.db) org.Dm.eg.db +Danio rerio (org.Dr.eg.db) org.Dr.eg.db +Escherichia coli (org.EcK12.eg.db) org.EcK12.eg.db +Escherichia coli (org.EcSakai.eg.db) org.EcSakai.eg.db +Gallus gallus (org.Gg.eg.db) org.Gg.eg.db +Homo sapiens (org.Hs.eg.db) org.Hs.eg.db +Mus musculus (org.Mm.eg.db) org.Mm.eg.db +Macaca mulatta (org.Mmu.eg.db) org.Mmu.eg.db +Plasmodium falciparum (org.Pf.plasmo.db) org.Pf.plasmo.db +Pan troglodytes (org.Pt.eg.db) org.Pt.eg.db +Rattus norvegicus (org.Rn.eg.db) org.Rn.eg.db +Saccharomyces cerevisiae (org.Sc.sgd.db) org.Sc.sgd.db +Sus scrofa (org.Ss.eg.db) org.Ss.eg.db +Toxoplasma gondii (org.Tgondii.eg.db) org.Tgondii.eg.db +Xenopus laevis (org.Xl.eg.db) org.Xl.eg.db
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/org_packages.tab Thu Mar 31 12:23:45 2016 -0400 @@ -0,0 +1,20 @@ +org.Ag.eg.db +org.At.tair.db +org.Bt.eg.db +org.Ce.eg.db +org.Cf.eg.db +org.Dm.eg.db +org.Dr.eg.db +org.EcK12.eg.db +org.EcSakai.eg.db +org.Gg.eg.db +org.Hs.eg.db +org.Mm.eg.db +org.Mmu.eg.db +org.Pf.plasmo.db +org.Pt.eg.db +org.Rn.eg.db +org.Sc.sgd.db +org.Ss.eg.db +org.Tgondii.eg.db +org.Xl.eg.db
--- a/tool_data_table_conf.xml.sample Mon Mar 07 14:35:53 2016 -0500 +++ b/tool_data_table_conf.xml.sample Thu Mar 31 12:23:45 2016 -0400 @@ -1,11 +1,15 @@ <tables> - <!-- Location of Picard dict file and other files --> - <table name="go_genomes" comment_char="#"> - <columns>value, name</columns> - <file path="${__HERE__}/test-data/genomes.loc" /> + <!-- Available organism packages and available categories for getgo tool --> + <table name="org_names" comment_char="#"> + <columns>name, value</columns> + <file path="tool-data/org_names.loc.sample" /> </table> - <table name="go_gene_ids" comment_char="#"> - <columns>value, name</columns> - <file path="${__HERE__}/test-data/gene_ids.loc" /> + <table name="available_categories" comment_char="#"> + <columns>name, value, package</columns> + <file path="tool-data/available_categories.loc.sample" /> + </table> + <table name="available_identifiers" comment_char="#"> + <columns>name, value, package</columns> + <file path="tool-data/available_identifiers.loc.sample" /> </table> </tables>
--- a/tool_data_table_conf.xml.sample.test Mon Mar 07 14:35:53 2016 -0500 +++ b/tool_data_table_conf.xml.sample.test Thu Mar 31 12:23:45 2016 -0400 @@ -1,11 +1,15 @@ <tables> - <!-- Location of Picard dict file and other files --> - <table name="go_genomes" comment_char="#"> - <columns>value, name</columns> - <file path="tool-data/genomes.loc" /> + <!-- Available organism packages and available categories for getgo tool --> + <table name="org_names" comment_char="#"> + <columns>name, value</columns> + <file path="${__HERE__}/test-data/org_names.loc" /> </table> - <table name="go_gene_ids" comment_char="#"> - <columns>value, name</columns> - <file path="tool-data/gene_ids.loc" /> + <table name="available_categories" comment_char="#"> + <columns>name, value, package</columns> + <file path="${__HERE__}/test-data/available_categories.loc" /> + </table> + <table name="available_identifiers" comment_char="#"> + <columns>name, value, package</columns> + <file path="${__HERE__}/test-data/available_identifiers.loc" /> </table> </tables>
--- a/tool_dependencies.xml Mon Mar 07 14:35:53 2016 -0500 +++ b/tool_dependencies.xml Thu Mar 31 12:23:45 2016 -0400 @@ -4,6 +4,6 @@ <repository changeset_revision="9f31a291b305" name="package_r_3_2_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="goseq" version="1.22.0"> - <repository changeset_revision="4f43af0c6b44" name="package_r_3_2_1_goseq_1_22_0" owner="mvdbeek" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="cc302cd9b007" name="package_r_3_2_1_goseq_1_22_0" owner="mvdbeek" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>