changeset 9:04b9c519d3e1 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
author mvdbeek
date Thu, 31 Mar 2016 12:23:45 -0400
parents fb95db039592
children f7f3f7db2d4a
files getgo.r getgo.xml go_macros.xml goseq.xml test-data/available_categories.loc test-data/available_identifiers.loc test-data/gene_ids.loc test-data/genomes.loc test-data/org_name.loc test-data/tool_data_table_conf.xml test-data/wal.tab tool-data/NCBI.sqlite tool-data/available_categories.loc.sample tool-data/available_identifiers.loc.sample tool-data/gene2pubmed.gz tool-data/gene_ids.loc.sample tool-data/genomes.loc.sample tool-data/listing_available_identifiers_and_data_sources.r tool-data/org_name.loc.sample tool-data/org_packages.tab tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample.test tool_dependencies.xml
diffstat 23 files changed, 647 insertions(+), 492 deletions(-) [+]
line wrap: on
line diff
--- a/getgo.r	Mon Mar 07 14:35:53 2016 -0500
+++ b/getgo.r	Thu Mar 31 12:23:45 2016 -0400
@@ -6,15 +6,13 @@
 suppressPackageStartupMessages({
     library("goseq")
     library("optparse")
-    library("rtracklayer")
     library("reshape2")
 })
 
 sink(stdout(), type = "message")
 
 option_list <- list(
-    make_option(c("-gtf", "--gtf"), type="character", help = "Path to GTF file for which to fetch GO data"),
-    make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up GO categories]"),
+    make_option(c("-p", "--package"), type="character", help = "Genome [used for looking up GO categories]"),
     make_option(c("-i", "--gene_id"), type="character", help="Gene ID format"),
     make_option(c("-c", "--cats"), type="character", help="Comma-seperated list of categories to fetch"),
     make_option(c("-o", "--output"), type="character", help="Path to output file")
@@ -25,17 +23,48 @@
 
 # vars
 
-gtf = args$gtf
-genome = args$genome
+package = args$package
 gene_id = args$gene_id
 output = args$output
 cats = unlist(strsplit(args$cats, ','))
 
-# retrieve and transform data
-genes = unique(import.gff(gtf)$gene_id)
-go_categories = getgo(genes, genome, gene_id, fetch.cats=cats)
-go_categories = goseq:::reversemapping(go_categories)
-go_categories = melt(go_categories)
+get_categories = function(package_str, gen, cat) {
+  # gen should be ENSEMBL, UNIGENE, REFSEQ, SYMBOL or GENENAME
+  # package should be org.Xx.eg.db
+  # cat should be PMID, GO2ALLEGS, ENZYME or PATH
+  library(package_str, character.only = TRUE)
+  package = eval( parse( text=package_str ) )
+  if( cat %in% c("GO2ALLEGS", "GO2ALLTAIRS", "GO2ALLORFS") ) {
+    cat = "GOALL"
+  }
+  if(package_str == "org.Pf.plasmo.db") {
+    keytype = "ORF"
+    } else if(package_str == "org.At.tair.db") {
+    keytype = "TAIR"
+    } else {
+    keytype = "ENTREZID"
+    }
+  entrez_cat = select(package, keys(package), cat, keytype)
+  entrez_cat = entrez_cat[complete.cases(entrez_cat),]
+    if( cat != "GOALL" ) {
+      # add the origin of the term, so that there are no duplicate values e.g between ENZYME and PATH
+      entrez_cat[,2] = sapply(entrez_cat[,2], function(x) paste(cat, x, sep=":"))
+    } else {
+      entrez_cat = entrez_cat[,c(1,2)] # we are discarding ontology (MF, CC, BP) and evidence class here
+    }
+  colnames(entrez_cat) = c(gen, "category")
+  if( gen == "ENTREZ" ) {
+    return( entrez_cat )
+    } else {
+      # We map ENTREZ to `gen`, but are potentially loosing gene identifiers where multiple identifiers match a single ENTREZ gene id.
+      entrez_cat[,1] = mapIds(package, keys=as.character(entrez_cat[,1]), keytype=keytype, column=gen, multiVals="first")
+      entrez_cat = entrez_cat[complete.cases(entrez_cat),]
+      return(entrez_cat)
+    }
+}
 
-write.table(go_categories, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
-sessionInfo()
\ No newline at end of file
+result = lapply( cats, function(x) get_categories(package, gene_id, x ) )
+result = do.call(rbind, result)
+
+write.table(result, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
+sessionInfo()
--- a/getgo.xml	Mon Mar 07 14:35:53 2016 -0500
+++ b/getgo.xml	Thu Mar 31 12:23:45 2016 -0400
@@ -1,53 +1,58 @@
-<tool id="getgo" name="getgo" version="0.1.0">
-    <description>downloads gene ontologies for model organisms</description>
+<tool id="getgo" name="get_gene_categories" version="0.1.0">
+    <description>retrieve gene categories for model organisms</description>
     <macros>
         <import>go_macros.xml</import>
     </macros>
     <expand macro="requirements" />
     <expand macro="stdio" />
-    <command interpreter="Rscript">
-        getgo.r --genome "$genome"
-        --gtf "$gtf"
+    <command><![CDATA[
+        Rscript $__tool_directory__/getgo.r
+        --package "$package"
         --gene_id "$gene_id"
         --output "$output"
         --cats "$cats"
+        ]]>
     </command>
     <inputs>
-        <param name="gtf" label="select GTF file" help="GO annotations for all gene ids in this GTF will be fetched" type="data" format="gtf"/>
-        <param help="Needed to retrieve GO annotations for the selected genome" label="Select the genome source" name="genome" size="3" type="select">
-            <options from_data_table="go_genomes"></options>
+        <param help="These are bioconductor genome annotation packages." label="Select the genome package" name="package" size="3" type="select">
+            <options from_data_table="org_names"></options>
         </param>
-        <param help="Needed for GO analysis" label="Select gene identifier format" name="gene_id" type="select">
-            <options from_data_table="go_gene_ids"></options>
+        <param help="This option determines which gene identifier format is used for mapping genes to categories in the output file. If you have a list of differentially expressed genes, choose the same format." label="Select gene identifier format" name="gene_id" type="select">
+            <options from_data_table="available_identifiers">
+                <filter type="param_value" ref="package" column="2"/>
+            </options>
         </param>
-        <param name="cats" help="Select the categories for which you would like to retrieve ontologies" type="select" multiple="true" display="checkboxes">
-            <option value="GO:CC">GO:Cellular Components</option>
-            <option value="GO:BP">BiologicalProcesses</option>
-            <option value="GO:MF">Molecular Function</option>
-            <option value="KEGG">KEGG pathway</option>
+        <param name="cats" label="Select categories" help="Select a category will return the category and a list of all genes in that category" type="select" multiple="true" display="checkboxes">
+            <options from_data_table="available_categories">
+                <filter type="param_value" ref="package" column="2"/>
+            </options>
         </param>
     </inputs>
     <outputs>
-        <data format="tabular" label="GO category mapping" name="output" />
+        <data format="tabular" label="gene category mapping" name="output" />
     </outputs>
     <tests>
         <test>
-            <param name="gtf" value="in.gtf" ftype="gtf"></param>
-            <param name="genome" value="hg38"></param>
-            <param name="gene_id" value="ensGene"></param>
-            <param name="cats" value="GO:CC,GO:BP,GO:MF"></param>
-            <output name="output" file="go_terms.tab"></output>
+            <param name="package" value="org.Hs.eg.db"></param>
+            <param name="gene_id" value="ENSEMBL"></param>
+            <param name="cats" value="GO2ALLEGS"></param>
+            <output name="output" file="go_terms.tab" compare="contains"></output>
         </test>
     </tests>
     <help>
 
         **What it does**
 
-        Returns a tabular file with GO gene categories for all genes present in the input GTF file.
-
+        This tool uses bioconductor species annotation packages [org.Xx.xx.db] to extract gene category information.
+        To do gene {category/set} enrichment analysis, use the output of this tool with the goseq tool.
 
         </help>
     <citations>
-        <citation type="doi">10.1186/gb-2010-11-2-r14</citation>
+        <citation type="bibtex">@ARTICLE{AnnotationDbi,
+        title = {AnnotationDbi: Annotation Database Interface},
+        author = {Herve Pages and Marc Carlson and Seth Falcon and Nianhua Li},
+        note = {R package version 1.32.3}
+        }
+        </citation>
     </citations>
 </tool>
--- a/go_macros.xml	Mon Mar 07 14:35:53 2016 -0500
+++ b/go_macros.xml	Thu Mar 31 12:23:45 2016 -0400
@@ -3,6 +3,7 @@
         <requirements>
             <requirement type="package" version="3.2.1">R</requirement>
             <requirement type="package" version="1.22.0">goseq</requirement>
+            <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
         </requirements>
     </xml>
     <xml name="stdio">
@@ -21,4 +22,4 @@
                    description="An undefined error occured, please check your input carefully and contact your administrator." />
         </stdio>
     </xml>
-</macros>
\ No newline at end of file
+</macros>
--- a/goseq.xml	Mon Mar 07 14:35:53 2016 -0500
+++ b/goseq.xml	Thu Mar 31 12:23:45 2016 -0400
@@ -70,8 +70,7 @@
             <param name="length_file" value="gene_length.tab" ftype="tabular"/>
             <param name="category_file" value="category.tab" ftype="tabular"/>
             <param name="use_genes_without_cat" value="true" />
-            <param name="p_adj_column" value="2" />
-            <output name="wallenius_tab" file="wal.tab" compare="contains"/>/>
+            <output name="wallenius_tab" file="wal.tab" compare="re_match"/>/>
         </test>
     </tests>
     <help>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/available_categories.loc	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,90 @@
+ENZYME	ENZYME	org.Ag.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Ag.eg.db
+PATH	PATH	org.Ag.eg.db
+PMID	PMID	org.Ag.eg.db
+ENZYME	ENZYME	org.At.tair.db
+GO2ALLTAIRS	GO2ALLTAIRS	org.At.tair.db
+PATH	PATH	org.At.tair.db
+PMID	PMID	org.At.tair.db
+ENZYME	ENZYME	org.Bt.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Bt.eg.db
+PATH	PATH	org.Bt.eg.db
+PFAM	PFAM	org.Bt.eg.db
+PMID	PMID	org.Bt.eg.db
+PROSITE	PROSITE	org.Bt.eg.db
+ENZYME	ENZYME	org.Ce.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Ce.eg.db
+PATH	PATH	org.Ce.eg.db
+PMID	PMID	org.Ce.eg.db
+ENZYME	ENZYME	org.Cf.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Cf.eg.db
+PATH	PATH	org.Cf.eg.db
+PMID	PMID	org.Cf.eg.db
+ENZYME	ENZYME	org.Dm.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Dm.eg.db
+PATH	PATH	org.Dm.eg.db
+PMID	PMID	org.Dm.eg.db
+ENZYME	ENZYME	org.Dr.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Dr.eg.db
+PATH	PATH	org.Dr.eg.db
+PFAM	PFAM	org.Dr.eg.db
+PMID	PMID	org.Dr.eg.db
+PROSITE	PROSITE	org.Dr.eg.db
+ENZYME	ENZYME	org.EcK12.eg.db
+GO2ALLEGS	GO2ALLEGS	org.EcK12.eg.db
+PATH	PATH	org.EcK12.eg.db
+PMID	PMID	org.EcK12.eg.db
+ENZYME	ENZYME	org.EcSakai.eg.db
+GO2ALLEGS	GO2ALLEGS	org.EcSakai.eg.db
+PATH	PATH	org.EcSakai.eg.db
+PMID	PMID	org.EcSakai.eg.db
+ENZYME	ENZYME	org.Gg.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Gg.eg.db
+PATH	PATH	org.Gg.eg.db
+PFAM	PFAM	org.Gg.eg.db
+PMID	PMID	org.Gg.eg.db
+PROSITE	PROSITE	org.Gg.eg.db
+ENZYME	ENZYME	org.Hs.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Hs.eg.db
+PATH	PATH	org.Hs.eg.db
+PFAM	PFAM	org.Hs.eg.db
+PMID	PMID	org.Hs.eg.db
+PROSITE	PROSITE	org.Hs.eg.db
+ENZYME	ENZYME	org.Mm.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Mm.eg.db
+PATH	PATH	org.Mm.eg.db
+PFAM	PFAM	org.Mm.eg.db
+PMID	PMID	org.Mm.eg.db
+PROSITE	PROSITE	org.Mm.eg.db
+ENZYME	ENZYME	org.Mmu.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Mmu.eg.db
+PATH	PATH	org.Mmu.eg.db
+PMID	PMID	org.Mmu.eg.db
+ENZYME	ENZYME	org.Pf.plasmo.db
+GO2ALLORFS	GO2ALLORFS	org.Pf.plasmo.db
+PATH	PATH	org.Pf.plasmo.db
+ENZYME	ENZYME	org.Pt.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Pt.eg.db
+PATH	PATH	org.Pt.eg.db
+PMID	PMID	org.Pt.eg.db
+ENZYME	ENZYME	org.Rn.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Rn.eg.db
+PATH	PATH	org.Rn.eg.db
+PFAM	PFAM	org.Rn.eg.db
+PMID	PMID	org.Rn.eg.db
+PROSITE	PROSITE	org.Rn.eg.db
+ENZYME	ENZYME	org.Sc.sgd.db
+GO2ALLORFS	GO2ALLORFS	org.Sc.sgd.db
+PATH	PATH	org.Sc.sgd.db
+PFAM	PFAM	org.Sc.sgd.db
+PMID	PMID	org.Sc.sgd.db
+ENZYME	ENZYME	org.Ss.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Ss.eg.db
+PATH	PATH	org.Ss.eg.db
+PMID	PMID	org.Ss.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Tgondii.eg.db
+PMID	PMID	org.Tgondii.eg.db
+ENZYME	ENZYME	org.Xl.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Xl.eg.db
+PATH	PATH	org.Xl.eg.db
+PMID	PMID	org.Xl.eg.db
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/available_identifiers.loc	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,137 @@
+ENTREZ	ENTREZ	org.Ag.eg.db
+ENTREZ	ENTREZ	org.At.tair.db
+ENTREZ	ENTREZ	org.Bt.eg.db
+ENTREZ	ENTREZ	org.Ce.eg.db
+ENTREZ	ENTREZ	org.Cf.eg.db
+ENTREZ	ENTREZ	org.Dm.eg.db
+ENTREZ	ENTREZ	org.Dr.eg.db
+ENTREZ	ENTREZ	org.EcK12.eg.db
+ENTREZ	ENTREZ	org.EcSakai.eg.db
+ENTREZ	ENTREZ	org.Gg.eg.db
+ENTREZ	ENTREZ	org.Hs.eg.db
+ENTREZ	ENTREZ	org.Mm.eg.db
+ENTREZ	ENTREZ	org.Mmu.eg.db
+ENTREZ	ENTREZ	org.Pf.plasmo.db
+ENTREZ	ENTREZ	org.Pt.eg.db
+ENTREZ	ENTREZ	org.Rn.eg.db
+ENTREZ	ENTREZ	org.Sc.sgd.db
+ENTREZ	ENTREZ	org.Ss.eg.db
+ENTREZ	ENTREZ	org.Tgondii.eg.db
+ENTREZ	ENTREZ	org.Xl.eg.db
+ACCNUM	ACCNUM	org.Ag.eg.db
+ENSEMBL	ENSEMBL	org.Ag.eg.db
+GENENAME	GENENAME	org.Ag.eg.db
+REFSEQ	REFSEQ	org.Ag.eg.db
+SYMBOL	SYMBOL	org.Ag.eg.db
+UNIGENE	UNIGENE	org.Ag.eg.db
+UNIPROT	UNIPROT	org.Ag.eg.db
+GENENAME	GENENAME	org.At.tair.db
+REFSEQ	REFSEQ	org.At.tair.db
+SYMBOL	SYMBOL	org.At.tair.db
+ACCNUM	ACCNUM	org.Bt.eg.db
+ENSEMBL	ENSEMBL	org.Bt.eg.db
+GENENAME	GENENAME	org.Bt.eg.db
+REFSEQ	REFSEQ	org.Bt.eg.db
+SYMBOL	SYMBOL	org.Bt.eg.db
+UNIGENE	UNIGENE	org.Bt.eg.db
+UNIPROT	UNIPROT	org.Bt.eg.db
+ACCNUM	ACCNUM	org.Ce.eg.db
+ENSEMBL	ENSEMBL	org.Ce.eg.db
+GENENAME	GENENAME	org.Ce.eg.db
+REFSEQ	REFSEQ	org.Ce.eg.db
+SYMBOL	SYMBOL	org.Ce.eg.db
+UNIGENE	UNIGENE	org.Ce.eg.db
+UNIPROT	UNIPROT	org.Ce.eg.db
+ACCNUM	ACCNUM	org.Cf.eg.db
+ENSEMBL	ENSEMBL	org.Cf.eg.db
+GENENAME	GENENAME	org.Cf.eg.db
+REFSEQ	REFSEQ	org.Cf.eg.db
+SYMBOL	SYMBOL	org.Cf.eg.db
+UNIGENE	UNIGENE	org.Cf.eg.db
+UNIPROT	UNIPROT	org.Cf.eg.db
+ACCNUM	ACCNUM	org.Dm.eg.db
+ENSEMBL	ENSEMBL	org.Dm.eg.db
+FLYBASECG	FLYBASECG	org.Dm.eg.db
+GENENAME	GENENAME	org.Dm.eg.db
+REFSEQ	REFSEQ	org.Dm.eg.db
+SYMBOL	SYMBOL	org.Dm.eg.db
+UNIGENE	UNIGENE	org.Dm.eg.db
+UNIPROT	UNIPROT	org.Dm.eg.db
+ACCNUM	ACCNUM	org.Dr.eg.db
+ENSEMBL	ENSEMBL	org.Dr.eg.db
+GENENAME	GENENAME	org.Dr.eg.db
+REFSEQ	REFSEQ	org.Dr.eg.db
+SYMBOL	SYMBOL	org.Dr.eg.db
+UNIGENE	UNIGENE	org.Dr.eg.db
+UNIPROT	UNIPROT	org.Dr.eg.db
+ACCNUM	ACCNUM	org.EcK12.eg.db
+GENENAME	GENENAME	org.EcK12.eg.db
+REFSEQ	REFSEQ	org.EcK12.eg.db
+SYMBOL	SYMBOL	org.EcK12.eg.db
+ACCNUM	ACCNUM	org.EcSakai.eg.db
+GENENAME	GENENAME	org.EcSakai.eg.db
+REFSEQ	REFSEQ	org.EcSakai.eg.db
+SYMBOL	SYMBOL	org.EcSakai.eg.db
+ACCNUM	ACCNUM	org.Gg.eg.db
+ENSEMBL	ENSEMBL	org.Gg.eg.db
+GENENAME	GENENAME	org.Gg.eg.db
+REFSEQ	REFSEQ	org.Gg.eg.db
+SYMBOL	SYMBOL	org.Gg.eg.db
+UNIGENE	UNIGENE	org.Gg.eg.db
+UNIPROT	UNIPROT	org.Gg.eg.db
+ACCNUM	ACCNUM	org.Hs.eg.db
+ENSEMBL	ENSEMBL	org.Hs.eg.db
+GENENAME	GENENAME	org.Hs.eg.db
+REFSEQ	REFSEQ	org.Hs.eg.db
+SYMBOL	SYMBOL	org.Hs.eg.db
+UNIGENE	UNIGENE	org.Hs.eg.db
+UNIPROT	UNIPROT	org.Hs.eg.db
+ACCNUM	ACCNUM	org.Mm.eg.db
+ENSEMBL	ENSEMBL	org.Mm.eg.db
+GENENAME	GENENAME	org.Mm.eg.db
+REFSEQ	REFSEQ	org.Mm.eg.db
+SYMBOL	SYMBOL	org.Mm.eg.db
+UNIGENE	UNIGENE	org.Mm.eg.db
+UNIPROT	UNIPROT	org.Mm.eg.db
+ACCNUM	ACCNUM	org.Mmu.eg.db
+ENSEMBL	ENSEMBL	org.Mmu.eg.db
+GENENAME	GENENAME	org.Mmu.eg.db
+REFSEQ	REFSEQ	org.Mmu.eg.db
+SYMBOL	SYMBOL	org.Mmu.eg.db
+UNIPROT	UNIPROT	org.Mmu.eg.db
+GENENAME	GENENAME	org.Pf.plasmo.db
+SYMBOL	SYMBOL	org.Pf.plasmo.db
+ACCNUM	ACCNUM	org.Pt.eg.db
+ENSEMBL	ENSEMBL	org.Pt.eg.db
+GENENAME	GENENAME	org.Pt.eg.db
+REFSEQ	REFSEQ	org.Pt.eg.db
+SYMBOL	SYMBOL	org.Pt.eg.db
+UNIPROT	UNIPROT	org.Pt.eg.db
+ACCNUM	ACCNUM	org.Rn.eg.db
+ENSEMBL	ENSEMBL	org.Rn.eg.db
+GENENAME	GENENAME	org.Rn.eg.db
+REFSEQ	REFSEQ	org.Rn.eg.db
+SYMBOL	SYMBOL	org.Rn.eg.db
+UNIGENE	UNIGENE	org.Rn.eg.db
+UNIPROT	UNIPROT	org.Rn.eg.db
+ENSEMBL	ENSEMBL	org.Sc.sgd.db
+GENENAME	GENENAME	org.Sc.sgd.db
+REFSEQ	REFSEQ	org.Sc.sgd.db
+UNIPROT	UNIPROT	org.Sc.sgd.db
+ACCNUM	ACCNUM	org.Ss.eg.db
+GENENAME	GENENAME	org.Ss.eg.db
+REFSEQ	REFSEQ	org.Ss.eg.db
+SYMBOL	SYMBOL	org.Ss.eg.db
+UNIGENE	UNIGENE	org.Ss.eg.db
+UNIPROT	UNIPROT	org.Ss.eg.db
+ACCNUM	ACCNUM	org.Tgondii.eg.db
+GENENAME	GENENAME	org.Tgondii.eg.db
+REFSEQ	REFSEQ	org.Tgondii.eg.db
+SYMBOL	SYMBOL	org.Tgondii.eg.db
+UNIGENE	UNIGENE	org.Tgondii.eg.db
+ACCNUM	ACCNUM	org.Xl.eg.db
+GENENAME	GENENAME	org.Xl.eg.db
+REFSEQ	REFSEQ	org.Xl.eg.db
+SYMBOL	SYMBOL	org.Xl.eg.db
+UNIGENE	UNIGENE	org.Xl.eg.db
+UNIPROT	UNIPROT	org.Xl.eg.db
--- a/test-data/gene_ids.loc	Mon Mar 07 14:35:53 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-knownGene	knownGene, UCSC Genes, NA, Entrez Gene ID
-knownGeneOld3	knownGeneOld3, Old UCSC Genes, NA, 
-ccdsGene	ccdsGene, CCDS, NA, 
-refGene	refGene, RefSeq Genes, NA, Entrez Gene ID
-xenoRefGene	xenoRefGene, Other RefSeq, NA, 
-vegaGene	vegaGene, Vega Genes, Vega Protein Genes, HAVANA Pseudogene ID
-vegaPseudoGene	vegaPseudoGene, Vega Genes, Vega Pseudogenes, HAVANA Pseudogene ID
-ensGene	ensGene, Ensembl Genes, NA, Ensembl gene ID
-acembly	acembly, AceView Genes, NA, 
-sibGene	sibGene, SIB Genes, NA, 
-nscanPasaGene	nscanPasaGene, N-SCAN, N-SCAN PASA-EST, 
-nscanGene	nscanGene, N-SCAN, N-SCAN, 
-sgpGene	sgpGene, SGP Genes, NA, 
-geneid	geneid, Geneid Genes, NA, 
-genscan	genscan, Genscan Genes, NA, 
-exoniphy	exoniphy, Exoniphy, NA, 
-augustusHints	augustusHints, Augustus, Augustus Hints, 
-augustusXRA	augustusXRA, Augustus, Augustus De Novo, 
-augustusAbinitio	augustusAbinitio, Augustus, Augustus Ab Initio, 
-acescan	acescan, ACEScan, NA, 
-lincRNAsTranscripts	lincRNAsTranscripts, lincRNAsTranscripts, NA, Name of gene
-wgEncodeGencodeManualV3	wgEncodeGencodeManualV3, Gencode Genes, Gencode Manual, Ensembl gene ID
-wgEncodeGencodeAutoV3	wgEncodeGencodeAutoV3, Gencode Genes, Gencode Auto, Ensembl gene ID
-wgEncodeGencodePolyaV3	wgEncodeGencodePolyaV3, Gencode Genes, Gencode PolyA, Ensembl gene ID
-wgEncodeGencodeBasicV17	wgEncodeGencodeBasicV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodeCompV17	wgEncodeGencodeCompV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodePseudoGeneV17	wgEncodeGencodePseudoGeneV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencode2wayConsPseudoV17	wgEncodeGencode2wayConsPseudoV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodePolyaV17	wgEncodeGencodePolyaV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodeBasicV14	wgEncodeGencodeBasicV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodeCompV14	wgEncodeGencodeCompV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodePseudoGeneV14	wgEncodeGencodePseudoGeneV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencode2wayConsPseudoV14	wgEncodeGencode2wayConsPseudoV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodePolyaV14	wgEncodeGencodePolyaV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodeBasicV7	wgEncodeGencodeBasicV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencodeCompV7	wgEncodeGencodeCompV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencodePseudoGeneV7	wgEncodeGencodePseudoGeneV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencode2wayConsPseudoV7	wgEncodeGencode2wayConsPseudoV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencodePolyaV7	wgEncodeGencodePolyaV7, GENCODE Genes V7, NA, Ensembl gene ID
-flyBaseGene	flyBaseGene, FlyBase Genes, NA, Name of canonical transcript in cluster
-sgdGene	sgdGene, SGD Genes, NA, Name of canonical transcript in cluster
-geneSymbol	geneSymbol, refGene, refFlat, Gene Symbol
--- a/test-data/genomes.loc	Mon Mar 07 14:35:53 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,170 +0,0 @@
-hg38	hg38, Human, Dec. 2013, Genome Reference Consortium GRCh38
-hg19	hg19, Human, Feb. 2009, Genome Reference Consortium GRCh37
-hg18	hg18, Human, Mar. 2006, NCBI Build 36.1
-hg17	hg17, Human, May 2004, NCBI Build 35
-hg16	hg16, Human, Jul. 2003, NCBI Build 34
-vicPac2	vicPac2, Alpaca, Mar. 2013, Broad Institute Vicugna_pacos-2.0.1
-vicPac1	vicPac1, Alpaca, Jul. 2008, Broad Institute VicPac1.0
-dasNov3	dasNov3, Armadillo, Dec. 2011, Broad Institute DasNov3
-otoGar3	otoGar3, Bushbaby, Mar. 2011, Broad Institute OtoGar3
-papHam1	papHam1, Baboon, Nov. 2008, Baylor College of Medicine HGSC Pham_1.0
-papAnu2	papAnu2, Baboon, Mar. 2012, Baylor College of Medicine Panu_2.0
-felCat8	felCat8, Cat, Nov. 2014, ICGSC Felis_catus_8.0
-felCat5	felCat5, Cat, Sep. 2011, ICGSC Felis_catus-6.2
-felCat4	felCat4, Cat, Dec. 2008, NHGRI catChrV17e
-felCat3	felCat3, Cat, Mar. 2006, Broad Institute Release 3
-panTro4	panTro4, Chimp, Feb. 2011, CGSC Build 2.1.4
-panTro3	panTro3, Chimp, Oct. 2010, CGSC Build 2.1.3
-panTro2	panTro2, Chimp, Mar. 2006, CGSC Build 2.1
-panTro1	panTro1, Chimp, Nov. 2003, CGSC Build 1.1
-criGri1	criGri1, Chinese hamster, Jul. 2013, Beijing Genomics Institution-Shenzhen C_griseus_v1.0
-bosTau8	bosTau8, Cow, Jun. 2014, University of Maryland v3.1.1
-bosTau7	bosTau7, Cow, Oct. 2011, Baylor College of Medicine HGSC Btau_4.6.1
-bosTau6	bosTau6, Cow, Nov. 2009, University of Maryland v3.1
-bosTau4	bosTau4, Cow, Oct. 2007, Baylor College of Medicine HGSC Btau_4.0
-bosTau3	bosTau3, Cow, Aug. 2006, Baylor College of Medicine HGSC Btau_3.1
-bosTau2	bosTau2, Cow, Mar. 2005, Baylor College of Medicine HGSC Btau_2.0
-canFam3	canFam3, Dog, Sep. 2011, Broad Institute v3.1
-canFam2	canFam2, Dog, May 2005, Broad Institute v2.0
-canFam1	canFam1, Dog, Jul. 2004, Broad Institute v1.0
-turTru2	turTru2, Dolphin, Oct. 2011, Baylor College of Medicine Ttru_1.4
-loxAfr3	loxAfr3, Elephant, Jul. 2009, Broad Institute LoxAfr3
-musFur1	musFur1, Ferret, Apr. 2011, Ferret Genome Sequencing Consortium MusPutFur1.0
-nomLeu3	nomLeu3, Gibbon, Oct. 2012, Gibbon Genome Sequencing Consortium Nleu3.0
-nomLeu2	nomLeu2, Gibbon, Jun. 2011, Gibbon Genome Sequencing Consortium Nleu1.1
-nomLeu1	nomLeu1, Gibbon, Jan. 2010, Gibbon Genome Sequencing Consortium Nleu1.0
-gorGor3	gorGor3, Gorilla, May 2011, Wellcome Trust Sanger Institute gorGor3.1
-cavPor3	cavPor3, Guinea pig, Feb. 2008, Broad Institute cavPor3
-eriEur2	eriEur2, Hedgehog, May 2012, Broad Institute EriEur2.0
-eriEur1	eriEur1, Hedgehog, Jun. 2006, Broad Institute Draft_v1
-equCab2	equCab2, Horse, Sep. 2007, Broad Institute EquCab2
-equCab1	equCab1, Horse, Jan. 2007, Broad Institute EquCab1
-dipOrd1	dipOrd1, Kangaroo rat, Jul. 2008, Baylor/Broad Institute DipOrd1.0
-triMan1	triMan1, Manatee, Oct. 2011, Broad Institute TriManLat1.0
-calJac3	calJac3, Marmoset, Mar. 2009, WUSTL Callithrix_jacchus-v3.2
-calJac1	calJac1, Marmoset, Jun. 2007, WUSTL Callithrix_jacchus-v2.0.2
-pteVam1	pteVam1, Megabat, Jul. 2008, Broad Institute Ptevap1.0
-myoLuc2	myoLuc2, Microbat, Jul. 2010, Broad Institute MyoLuc2.0
-balAcu1	balAcu1, Minke whale, Oct. 2013, KORDI BalAcu1.0
-mm10	mm10, Mouse, Dec. 2011, Genome Reference Consortium GRCm38
-mm9	mm9, Mouse, Jul. 2007, NCBI Build 37
-mm8	mm8, Mouse, Feb. 2006, NCBI Build 36
-mm7	mm7, Mouse, Aug. 2005, NCBI Build 35
-micMur2	micMur2, Mouse lemur, May 2015, Baylor/Broad Institute Mmur_2.0
-micMur1	micMur1, Mouse lemur, Jul. 2007, Broad Institute MicMur1.0
-hetGla2	hetGla2, Naked mole-rat, Jan. 2012, Broad Institute HetGla_female_1.0
-hetGla1	hetGla1, Naked mole-rat, Jul. 2011, Beijing Genomics Institute HetGla_1.0
-monDom5	monDom5, Opossum, Oct. 2006, Broad Institute release MonDom5
-monDom4	monDom4, Opossum, Jan. 2006, Broad Institute release MonDom4
-monDom1	monDom1, Opossum, Oct. 2004, Broad Institute release MonDom1
-ponAbe2	ponAbe2, Orangutan, Jul. 2007, WUSTL Pongo_albelii-2.0.2
-ailMel1	ailMel1, Panda, Dec. 2009, BGI-Shenzhen AilMel 1.0
-susScr3	susScr3, Pig, Aug. 2011, Swine Genome Sequencing Consortium Sscrofa10.2
-susScr2	susScr2, Pig, Nov. 2009, Swine Genome Sequencing Consortium Sscrofa9.2
-ochPri3	ochPri3, Pika, May 2012, Broad Institute OchPri3.0
-ochPri2	ochPri2, Pika, Jul. 2008, Broad Institute OchPri2
-ornAna2	ornAna2, Platypus, Feb. 2007, WUSTL v5.0.1
-ornAna1	ornAna1, Platypus, Mar. 2007, WUSTL v5.0.1
-oryCun2	oryCun2, Rabbit, Apr. 2009, Broad Institute release OryCun2
-rn6	rn6, Rat, Jul. 2014, RGSC Rnor_6.0
-rn5	rn5, Rat, Mar. 2012, RGSC Rnor_5.0
-rn4	rn4, Rat, Nov. 2004, Baylor College of Medicine HGSC v3.4
-rn3	rn3, Rat, Jun. 2003, Baylor College of Medicine HGSC v3.1
-rheMac3	rheMac3, Rhesus, Oct. 2010, Beijing Genomics Institute CR_1.0
-rheMac2	rheMac2, Rhesus, Jan. 2006, Baylor College of Medicine HGSC v1.0 Mmul_051212
-proCap1	proCap1, Rock hyrax, Jul. 2008, Baylor College of Medicine HGSC Procap1.0
-oviAri3	oviAri3, Sheep, Aug. 2012, ISGC Oar_v3.1
-oviAri1	oviAri1, Sheep, Feb. 2010, ISGC Ovis aries 1.0
-sorAra2	sorAra2, Shrew, Aug. 2008, Broad Institute SorAra2.0
-sorAra1	sorAra1, Shrew, Jun. 2006, Broad Institute SorAra1.0
-choHof1	choHof1, Sloth, Jul. 2008, Broad Institute ChoHof1.0
-speTri2	speTri2, Squirrel, Nov. 2011, Broad Institute SpeTri2.0
-saiBol1	saiBol1, Squirrel monkey, Oct. 2011, Broad Institute SaiBol1.0
-tarSyr2	tarSyr2, Tarsier, Sep. 2013, WashU Tarsius_syrichta-2.0.1
-tarSyr1	tarSyr1, Tarsier, Aug. 2008, WUSTL/Broad Institute Tarsyr1.0
-sarHar1	sarHar1, Tasmanian devil, Feb. 2011, Wellcome Trust Sanger Institute Devil_refv7.0
-echTel2	echTel2, Tenrec, Nov. 2012, Broad Institute EchTel2.0
-echTel1	echTel1, Tenrec, Jul. 2005, Broad Institute echTel1
-tupBel1	tupBel1, Tree shrew, Dec. 2006, Broad Institute Tupbel1.0
-macEug2	macEug2, Wallaby, Sep. 2009, Tammar Wallaby Genome Sequencing Consortium Meug_1.1
-cerSim1	cerSim1, White rhinoceros, May 2012, Broad Institute CerSimSim1.0
-allMis1	allMis1, American alligator, Aug. 2012, Int. Crocodilian Genomes Working Group allMis0.2
-gadMor1	gadMor1, Atlantic cod, May 2010, Genofisk GadMor_May2010
-melUnd1	melUnd1, Budgerigar, Sep. 2011, WUSTL v6.3
-galGal4	galGal4, Chicken, Nov.
-2011, ICGC Gallus-gallus-4.0
-galGal3	galGal3, Chicken, May 2006, WUSTL Gallus-gallus-2.1
-galGal2	galGal2, Chicken, Feb. 2004, WUSTL Gallus-gallus-1.0
-latCha1	latCha1, Coelacanth, Aug. 2011, Broad Institute LatCha1
-calMil1	calMil1, Elephant shark, Dec. 2013, IMCB Callorhinchus_milli_6.1.3
-fr3	fr3, Fugu, Oct. 2011, JGI v5.0
-fr2	fr2, Fugu, Oct. 2004, JGI v4.0
-fr1	fr1, Fugu, Aug. 2002, JGI v3.0
-petMar2	petMar2, Lamprey, Sep. 2010, WUGSC 7.0
-petMar1	petMar1, Lamprey, Mar. 2007, WUSTL v3.0
-anoCar2	anoCar2, Lizard, May 2010, Broad Institute AnoCar2
-anoCar1	anoCar1, Lizard, Feb. 2007, Broad Institute AnoCar1
-oryLat2	oryLat2, Medaka, Oct. 2005, NIG v1.0
-geoFor1	geoFor1, Medium ground finch, Apr. 2012, BGI GeoFor_1.0 / NCBI 13302
-oreNil2	oreNil2, Nile tilapia, Jan. 2011, Broad Institute Release OreNil1.1
-chrPic1	chrPic1, Painted turtle, Dec. 2011, IPTGSC Chrysemys_picta_bellii-3.0.1
-gasAcu1	gasAcu1, Stickleback, Feb. 2006, Broad Institute Release 1.0
-tetNig2	tetNig2, Tetraodon, Mar. 2007, Genoscope v7
-tetNig1	tetNig1, Tetraodon, Feb. 2004, Genoscope v7
-melGal1	melGal1, Turkey, Dec. 2009, Turkey Genome Consortium v2.01
-xenTro7	xenTro7, X. tropicalis, Sep. 2012, JGI v.7.0
-xenTro3	xenTro3, X. tropicalis, Nov. 2009, JGI v.4.2
-xenTro2	xenTro2, X. tropicalis, Aug. 2005, JGI v.4.1
-xenTro1	xenTro1, X. tropicalis, Oct. 2004, JGI v.3.0
-taeGut2	taeGut2, Zebra finch, Feb. 2013, WashU taeGut324
-taeGut1	taeGut1, Zebra finch, Jul. 2008, WUSTL v3.2.4
-danRer10	danRer10, Zebrafish, Sep. 2014, Genome Reference Consortium GRCz10 
-danRer7	danRer7, Zebrafish, Jul. 2010, Sanger Institute Zv9 
-danRer6	danRer6, Zebrafish, Dec. 2008, Sanger Institute Zv8 
-danRer5	danRer5, Zebrafish, Jul. 2007, Sanger Institute Zv7 
-danRer4	danRer4, Zebrafish, Mar. 2006, Sanger Institute Zv6 
-danRer3	danRer3, Zebrafish, May 2005, Sanger Institute Zv5 
-ci2	ci2, C. intestinalis, Mar. 2005, JGI v2.0
-ci1	ci1, C. intestinalis, Dec. 2002, JGI v1.0
-braFlo1	braFlo1, Lancelet, Mar. 2006, JGI v1.0
-strPur2	strPur2, S. purpuratus, Sep. 2006, Baylor College of Medicine HGSC v. Spur 2.1
-strPur1	strPur1, S. purpuratus, Apr. 2005, Baylor College of Medicine HGSC v. Spur_0.5
-apiMel2	apiMel2, A. mellifera, Jan. 2005, Baylor College of Medicine HGSC v.Amel_2.0 
-apiMel1	apiMel1, A. mellifera, Jul. 2004, Baylor College of Medicine HGSC v.Amel_1.2 
-anoGam1	anoGam1, A. gambiae, Feb. 2003, IAGP v.MOZ2
-droAna2	droAna2, D. ananassae, Aug. 2005, Agencourt Arachne release
-droAna1	droAna1, D. ananassae, Jul. 2004, TIGR Celera release
-droEre1	droEre1, D. erecta, Aug. 2005, Agencourt Arachne release
-droGri1	droGri1, D. grimshawi, Aug. 2005, Agencourt Arachne release
-dm6	dm6, D. melanogaster, Aug. 2014, BDGP Release 6 + ISO1 MT
-dm3	dm3, D. melanogaster, Apr. 2006, BDGP Release 5
-dm2	dm2, D. melanogaster, Apr. 2004, BDGP Release 4
-dm1	dm1, D. melanogaster, Jan. 2003, BDGP Release 3
-droMoj2	droMoj2, D. mojavensis, Aug. 2005, Agencourt Arachne release
-droMoj1	droMoj1, D. mojavensis, Aug. 2004, Agencourt Arachne release
-droPer1	droPer1, D. persimilis, Oct. 2005, Broad Institute release
-dp3	dp3, D. pseudoobscura, Nov. 2004, Flybase Release 1.0
-dp2	dp2, D. pseudoobscura, Aug. 2003, Baylor College of Medicine HGSC Freeze 1
-droSec1	droSec1, D. sechellia, Oct. 2005, Broad Institute Release 1.0
-droSim1	droSim1, D. simulans, Apr. 2005, WUSTL Release 1.0
-droVir2	droVir2, D. virilis, Aug. 2005, Agencourt Arachne release
-droVir1	droVir1, D. virilis, Jul. 2004, Agencourt Arachne release
-droYak2	droYak2, D. yakuba, Nov. 2005, WUSTL Release 2.0
-droYak1	droYak1, D. yakuba, Apr. 2004, WUSTL Release 1.0
-caePb2	caePb2, C. brenneri, Feb. 2008, WUSTL 6.0.1
-caePb1	caePb1, C. brenneri, Jan. 2007, WUSTL 4.0
-cb3	cb3, C. briggsae, Jan. 2007, WUSTL Cb3
-cb1	cb1, C. briggsae, Jul. 2002, WormBase v. cb25.agp8
-ce10	ce10, C. elegans, Oct. 2010, WormBase v. WS220
-ce6	ce6, C. elegans, May 2008, WormBase v. WS190
-ce4	ce4, C. elegans, Jan. 2007, WormBase v. WS170
-ce2	ce2, C. elegans, Mar. 2004, WormBase v. WS120
-caeJap1	caeJap1, C. japonica, Mar. 2008, WUSTL 3.0.2
-caeRem3	caeRem3, C. remanei, May 2007, WUSTL 15.0.1
-caeRem2	caeRem2, C. remanei, Mar. 2006, WUSTL 1.0
-priPac1	priPac1, P. pacificus, Feb. 2007, WUSTL 5.0
-aplCal1	aplCal1, Sea Hare, Sep. 2008, Broad Release Aplcal2.0
-sacCer3	sacCer3, Yeast, April 2011, SGD April 2011 sequence
-sacCer2	sacCer2, Yeast, June 2008, SGD June 2008 sequence
-sacCer1	sacCer1, Yeast, Oct. 2003, SGD 1 Oct 2003 sequence
-eboVir3	eboVir3, Ebola Virus, June 2014, Sierra Leone 2014 (G3683/KM034562.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/org_name.loc	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,20 @@
+Anopheles gambiae (org.Ag.eg.db)	org.Ag.eg.db
+Arabidopsis thaliana (org.At.tair.db)	org.At.tair.db
+Bos taurus (org.Bt.eg.db)	org.Bt.eg.db
+Caenorhabditis elegans (org.Ce.eg.db)	org.Ce.eg.db
+Canis familiaris (org.Cf.eg.db)	org.Cf.eg.db
+Drosophila melanogaster (org.Dm.eg.db)	org.Dm.eg.db
+Danio rerio (org.Dr.eg.db)	org.Dr.eg.db
+Escherichia coli (org.EcK12.eg.db)	org.EcK12.eg.db
+Escherichia coli (org.EcSakai.eg.db)	org.EcSakai.eg.db
+Gallus gallus (org.Gg.eg.db)	org.Gg.eg.db
+Homo sapiens (org.Hs.eg.db)	org.Hs.eg.db
+Mus musculus (org.Mm.eg.db)	org.Mm.eg.db
+Macaca mulatta (org.Mmu.eg.db)	org.Mmu.eg.db
+Plasmodium falciparum (org.Pf.plasmo.db)	org.Pf.plasmo.db
+Pan troglodytes (org.Pt.eg.db)	org.Pt.eg.db
+Rattus norvegicus (org.Rn.eg.db)	org.Rn.eg.db
+Saccharomyces cerevisiae (org.Sc.sgd.db)	org.Sc.sgd.db
+Sus scrofa (org.Ss.eg.db)	org.Ss.eg.db
+Toxoplasma gondii (org.Tgondii.eg.db)	org.Tgondii.eg.db
+Xenopus laevis (org.Xl.eg.db)	org.Xl.eg.db
--- a/test-data/tool_data_table_conf.xml	Mon Mar 07 14:35:53 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-<tables>
-    <!-- Location of Picard dict file and other files -->
-    <table name="go_genomes" comment_char="#">
-        <columns>value, name</columns>
-        <file path="${__HERE__}/test-data/genomes.loc" />
-    </table>
-    <table name="go_gene_ids" comment_char="#">
-        <columns>value, name</columns>
-        <file path="${__HERE__}/test-data/gene_ids.loc" />
-    </table>
-</tables>
\ No newline at end of file
--- a/test-data/wal.tab	Mon Mar 07 14:35:53 2016 -0500
+++ b/test-data/wal.tab	Thu Mar 31 12:23:45 2016 -0400
@@ -1,3 +1,3 @@
-category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p.adjust.over_represented	p.adjust.under_represented
-GO:0000278	0.0122606865510724	0.999300084010281	4	5	mitotic cell cycle	BP	0.0245213731021448	0.999300084010281
-GO:0000003	1	0.796172371987733	0	1	reproduction	BP	1	0.999300084010281
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology	p\.adjust.over_represented	p\.adjust.under_represented
+GO:0000278	0\.0122.+	0\.999.+	4	5	mitotic cell cycle	BP	0\.0245.+	0\.999.+
+GO:0000003	1	0\.796.+	0	1	reproduction	BP	1	0\.999.+
Binary file tool-data/NCBI.sqlite has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/available_categories.loc.sample	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,90 @@
+ENZYME	ENZYME	org.Ag.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Ag.eg.db
+PATH	PATH	org.Ag.eg.db
+PMID	PMID	org.Ag.eg.db
+ENZYME	ENZYME	org.At.tair.db
+GO2ALLTAIRS	GO2ALLTAIRS	org.At.tair.db
+PATH	PATH	org.At.tair.db
+PMID	PMID	org.At.tair.db
+ENZYME	ENZYME	org.Bt.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Bt.eg.db
+PATH	PATH	org.Bt.eg.db
+PFAM	PFAM	org.Bt.eg.db
+PMID	PMID	org.Bt.eg.db
+PROSITE	PROSITE	org.Bt.eg.db
+ENZYME	ENZYME	org.Ce.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Ce.eg.db
+PATH	PATH	org.Ce.eg.db
+PMID	PMID	org.Ce.eg.db
+ENZYME	ENZYME	org.Cf.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Cf.eg.db
+PATH	PATH	org.Cf.eg.db
+PMID	PMID	org.Cf.eg.db
+ENZYME	ENZYME	org.Dm.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Dm.eg.db
+PATH	PATH	org.Dm.eg.db
+PMID	PMID	org.Dm.eg.db
+ENZYME	ENZYME	org.Dr.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Dr.eg.db
+PATH	PATH	org.Dr.eg.db
+PFAM	PFAM	org.Dr.eg.db
+PMID	PMID	org.Dr.eg.db
+PROSITE	PROSITE	org.Dr.eg.db
+ENZYME	ENZYME	org.EcK12.eg.db
+GO2ALLEGS	GO2ALLEGS	org.EcK12.eg.db
+PATH	PATH	org.EcK12.eg.db
+PMID	PMID	org.EcK12.eg.db
+ENZYME	ENZYME	org.EcSakai.eg.db
+GO2ALLEGS	GO2ALLEGS	org.EcSakai.eg.db
+PATH	PATH	org.EcSakai.eg.db
+PMID	PMID	org.EcSakai.eg.db
+ENZYME	ENZYME	org.Gg.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Gg.eg.db
+PATH	PATH	org.Gg.eg.db
+PFAM	PFAM	org.Gg.eg.db
+PMID	PMID	org.Gg.eg.db
+PROSITE	PROSITE	org.Gg.eg.db
+ENZYME	ENZYME	org.Hs.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Hs.eg.db
+PATH	PATH	org.Hs.eg.db
+PFAM	PFAM	org.Hs.eg.db
+PMID	PMID	org.Hs.eg.db
+PROSITE	PROSITE	org.Hs.eg.db
+ENZYME	ENZYME	org.Mm.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Mm.eg.db
+PATH	PATH	org.Mm.eg.db
+PFAM	PFAM	org.Mm.eg.db
+PMID	PMID	org.Mm.eg.db
+PROSITE	PROSITE	org.Mm.eg.db
+ENZYME	ENZYME	org.Mmu.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Mmu.eg.db
+PATH	PATH	org.Mmu.eg.db
+PMID	PMID	org.Mmu.eg.db
+ENZYME	ENZYME	org.Pf.plasmo.db
+GO2ALLORFS	GO2ALLORFS	org.Pf.plasmo.db
+PATH	PATH	org.Pf.plasmo.db
+ENZYME	ENZYME	org.Pt.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Pt.eg.db
+PATH	PATH	org.Pt.eg.db
+PMID	PMID	org.Pt.eg.db
+ENZYME	ENZYME	org.Rn.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Rn.eg.db
+PATH	PATH	org.Rn.eg.db
+PFAM	PFAM	org.Rn.eg.db
+PMID	PMID	org.Rn.eg.db
+PROSITE	PROSITE	org.Rn.eg.db
+ENZYME	ENZYME	org.Sc.sgd.db
+GO2ALLORFS	GO2ALLORFS	org.Sc.sgd.db
+PATH	PATH	org.Sc.sgd.db
+PFAM	PFAM	org.Sc.sgd.db
+PMID	PMID	org.Sc.sgd.db
+ENZYME	ENZYME	org.Ss.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Ss.eg.db
+PATH	PATH	org.Ss.eg.db
+PMID	PMID	org.Ss.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Tgondii.eg.db
+PMID	PMID	org.Tgondii.eg.db
+ENZYME	ENZYME	org.Xl.eg.db
+GO2ALLEGS	GO2ALLEGS	org.Xl.eg.db
+PATH	PATH	org.Xl.eg.db
+PMID	PMID	org.Xl.eg.db
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/available_identifiers.loc.sample	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,137 @@
+ENTREZ	ENTREZ	org.Ag.eg.db
+ENTREZ	ENTREZ	org.At.tair.db
+ENTREZ	ENTREZ	org.Bt.eg.db
+ENTREZ	ENTREZ	org.Ce.eg.db
+ENTREZ	ENTREZ	org.Cf.eg.db
+ENTREZ	ENTREZ	org.Dm.eg.db
+ENTREZ	ENTREZ	org.Dr.eg.db
+ENTREZ	ENTREZ	org.EcK12.eg.db
+ENTREZ	ENTREZ	org.EcSakai.eg.db
+ENTREZ	ENTREZ	org.Gg.eg.db
+ENTREZ	ENTREZ	org.Hs.eg.db
+ENTREZ	ENTREZ	org.Mm.eg.db
+ENTREZ	ENTREZ	org.Mmu.eg.db
+ENTREZ	ENTREZ	org.Pf.plasmo.db
+ENTREZ	ENTREZ	org.Pt.eg.db
+ENTREZ	ENTREZ	org.Rn.eg.db
+ENTREZ	ENTREZ	org.Sc.sgd.db
+ENTREZ	ENTREZ	org.Ss.eg.db
+ENTREZ	ENTREZ	org.Tgondii.eg.db
+ENTREZ	ENTREZ	org.Xl.eg.db
+ACCNUM	ACCNUM	org.Ag.eg.db
+ENSEMBL	ENSEMBL	org.Ag.eg.db
+GENENAME	GENENAME	org.Ag.eg.db
+REFSEQ	REFSEQ	org.Ag.eg.db
+SYMBOL	SYMBOL	org.Ag.eg.db
+UNIGENE	UNIGENE	org.Ag.eg.db
+UNIPROT	UNIPROT	org.Ag.eg.db
+GENENAME	GENENAME	org.At.tair.db
+REFSEQ	REFSEQ	org.At.tair.db
+SYMBOL	SYMBOL	org.At.tair.db
+ACCNUM	ACCNUM	org.Bt.eg.db
+ENSEMBL	ENSEMBL	org.Bt.eg.db
+GENENAME	GENENAME	org.Bt.eg.db
+REFSEQ	REFSEQ	org.Bt.eg.db
+SYMBOL	SYMBOL	org.Bt.eg.db
+UNIGENE	UNIGENE	org.Bt.eg.db
+UNIPROT	UNIPROT	org.Bt.eg.db
+ACCNUM	ACCNUM	org.Ce.eg.db
+ENSEMBL	ENSEMBL	org.Ce.eg.db
+GENENAME	GENENAME	org.Ce.eg.db
+REFSEQ	REFSEQ	org.Ce.eg.db
+SYMBOL	SYMBOL	org.Ce.eg.db
+UNIGENE	UNIGENE	org.Ce.eg.db
+UNIPROT	UNIPROT	org.Ce.eg.db
+ACCNUM	ACCNUM	org.Cf.eg.db
+ENSEMBL	ENSEMBL	org.Cf.eg.db
+GENENAME	GENENAME	org.Cf.eg.db
+REFSEQ	REFSEQ	org.Cf.eg.db
+SYMBOL	SYMBOL	org.Cf.eg.db
+UNIGENE	UNIGENE	org.Cf.eg.db
+UNIPROT	UNIPROT	org.Cf.eg.db
+ACCNUM	ACCNUM	org.Dm.eg.db
+ENSEMBL	ENSEMBL	org.Dm.eg.db
+FLYBASECG	FLYBASECG	org.Dm.eg.db
+GENENAME	GENENAME	org.Dm.eg.db
+REFSEQ	REFSEQ	org.Dm.eg.db
+SYMBOL	SYMBOL	org.Dm.eg.db
+UNIGENE	UNIGENE	org.Dm.eg.db
+UNIPROT	UNIPROT	org.Dm.eg.db
+ACCNUM	ACCNUM	org.Dr.eg.db
+ENSEMBL	ENSEMBL	org.Dr.eg.db
+GENENAME	GENENAME	org.Dr.eg.db
+REFSEQ	REFSEQ	org.Dr.eg.db
+SYMBOL	SYMBOL	org.Dr.eg.db
+UNIGENE	UNIGENE	org.Dr.eg.db
+UNIPROT	UNIPROT	org.Dr.eg.db
+ACCNUM	ACCNUM	org.EcK12.eg.db
+GENENAME	GENENAME	org.EcK12.eg.db
+REFSEQ	REFSEQ	org.EcK12.eg.db
+SYMBOL	SYMBOL	org.EcK12.eg.db
+ACCNUM	ACCNUM	org.EcSakai.eg.db
+GENENAME	GENENAME	org.EcSakai.eg.db
+REFSEQ	REFSEQ	org.EcSakai.eg.db
+SYMBOL	SYMBOL	org.EcSakai.eg.db
+ACCNUM	ACCNUM	org.Gg.eg.db
+ENSEMBL	ENSEMBL	org.Gg.eg.db
+GENENAME	GENENAME	org.Gg.eg.db
+REFSEQ	REFSEQ	org.Gg.eg.db
+SYMBOL	SYMBOL	org.Gg.eg.db
+UNIGENE	UNIGENE	org.Gg.eg.db
+UNIPROT	UNIPROT	org.Gg.eg.db
+ACCNUM	ACCNUM	org.Hs.eg.db
+ENSEMBL	ENSEMBL	org.Hs.eg.db
+GENENAME	GENENAME	org.Hs.eg.db
+REFSEQ	REFSEQ	org.Hs.eg.db
+SYMBOL	SYMBOL	org.Hs.eg.db
+UNIGENE	UNIGENE	org.Hs.eg.db
+UNIPROT	UNIPROT	org.Hs.eg.db
+ACCNUM	ACCNUM	org.Mm.eg.db
+ENSEMBL	ENSEMBL	org.Mm.eg.db
+GENENAME	GENENAME	org.Mm.eg.db
+REFSEQ	REFSEQ	org.Mm.eg.db
+SYMBOL	SYMBOL	org.Mm.eg.db
+UNIGENE	UNIGENE	org.Mm.eg.db
+UNIPROT	UNIPROT	org.Mm.eg.db
+ACCNUM	ACCNUM	org.Mmu.eg.db
+ENSEMBL	ENSEMBL	org.Mmu.eg.db
+GENENAME	GENENAME	org.Mmu.eg.db
+REFSEQ	REFSEQ	org.Mmu.eg.db
+SYMBOL	SYMBOL	org.Mmu.eg.db
+UNIPROT	UNIPROT	org.Mmu.eg.db
+GENENAME	GENENAME	org.Pf.plasmo.db
+SYMBOL	SYMBOL	org.Pf.plasmo.db
+ACCNUM	ACCNUM	org.Pt.eg.db
+ENSEMBL	ENSEMBL	org.Pt.eg.db
+GENENAME	GENENAME	org.Pt.eg.db
+REFSEQ	REFSEQ	org.Pt.eg.db
+SYMBOL	SYMBOL	org.Pt.eg.db
+UNIPROT	UNIPROT	org.Pt.eg.db
+ACCNUM	ACCNUM	org.Rn.eg.db
+ENSEMBL	ENSEMBL	org.Rn.eg.db
+GENENAME	GENENAME	org.Rn.eg.db
+REFSEQ	REFSEQ	org.Rn.eg.db
+SYMBOL	SYMBOL	org.Rn.eg.db
+UNIGENE	UNIGENE	org.Rn.eg.db
+UNIPROT	UNIPROT	org.Rn.eg.db
+ENSEMBL	ENSEMBL	org.Sc.sgd.db
+GENENAME	GENENAME	org.Sc.sgd.db
+REFSEQ	REFSEQ	org.Sc.sgd.db
+UNIPROT	UNIPROT	org.Sc.sgd.db
+ACCNUM	ACCNUM	org.Ss.eg.db
+GENENAME	GENENAME	org.Ss.eg.db
+REFSEQ	REFSEQ	org.Ss.eg.db
+SYMBOL	SYMBOL	org.Ss.eg.db
+UNIGENE	UNIGENE	org.Ss.eg.db
+UNIPROT	UNIPROT	org.Ss.eg.db
+ACCNUM	ACCNUM	org.Tgondii.eg.db
+GENENAME	GENENAME	org.Tgondii.eg.db
+REFSEQ	REFSEQ	org.Tgondii.eg.db
+SYMBOL	SYMBOL	org.Tgondii.eg.db
+UNIGENE	UNIGENE	org.Tgondii.eg.db
+ACCNUM	ACCNUM	org.Xl.eg.db
+GENENAME	GENENAME	org.Xl.eg.db
+REFSEQ	REFSEQ	org.Xl.eg.db
+SYMBOL	SYMBOL	org.Xl.eg.db
+UNIGENE	UNIGENE	org.Xl.eg.db
+UNIPROT	UNIPROT	org.Xl.eg.db
Binary file tool-data/gene2pubmed.gz has changed
--- a/tool-data/gene_ids.loc.sample	Mon Mar 07 14:35:53 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-knownGene	knownGene, UCSC Genes, NA, Entrez Gene ID
-knownGeneOld3	knownGeneOld3, Old UCSC Genes, NA, 
-ccdsGene	ccdsGene, CCDS, NA, 
-refGene	refGene, RefSeq Genes, NA, Entrez Gene ID
-xenoRefGene	xenoRefGene, Other RefSeq, NA, 
-vegaGene	vegaGene, Vega Genes, Vega Protein Genes, HAVANA Pseudogene ID
-vegaPseudoGene	vegaPseudoGene, Vega Genes, Vega Pseudogenes, HAVANA Pseudogene ID
-ensGene	ensGene, Ensembl Genes, NA, Ensembl gene ID
-acembly	acembly, AceView Genes, NA, 
-sibGene	sibGene, SIB Genes, NA, 
-nscanPasaGene	nscanPasaGene, N-SCAN, N-SCAN PASA-EST, 
-nscanGene	nscanGene, N-SCAN, N-SCAN, 
-sgpGene	sgpGene, SGP Genes, NA, 
-geneid	geneid, Geneid Genes, NA, 
-genscan	genscan, Genscan Genes, NA, 
-exoniphy	exoniphy, Exoniphy, NA, 
-augustusHints	augustusHints, Augustus, Augustus Hints, 
-augustusXRA	augustusXRA, Augustus, Augustus De Novo, 
-augustusAbinitio	augustusAbinitio, Augustus, Augustus Ab Initio, 
-acescan	acescan, ACEScan, NA, 
-lincRNAsTranscripts	lincRNAsTranscripts, lincRNAsTranscripts, NA, Name of gene
-wgEncodeGencodeManualV3	wgEncodeGencodeManualV3, Gencode Genes, Gencode Manual, Ensembl gene ID
-wgEncodeGencodeAutoV3	wgEncodeGencodeAutoV3, Gencode Genes, Gencode Auto, Ensembl gene ID
-wgEncodeGencodePolyaV3	wgEncodeGencodePolyaV3, Gencode Genes, Gencode PolyA, Ensembl gene ID
-wgEncodeGencodeBasicV17	wgEncodeGencodeBasicV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodeCompV17	wgEncodeGencodeCompV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodePseudoGeneV17	wgEncodeGencodePseudoGeneV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencode2wayConsPseudoV17	wgEncodeGencode2wayConsPseudoV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodePolyaV17	wgEncodeGencodePolyaV17, GENCODE Genes V17, NA, Ensembl gene ID
-wgEncodeGencodeBasicV14	wgEncodeGencodeBasicV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodeCompV14	wgEncodeGencodeCompV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodePseudoGeneV14	wgEncodeGencodePseudoGeneV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencode2wayConsPseudoV14	wgEncodeGencode2wayConsPseudoV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodePolyaV14	wgEncodeGencodePolyaV14, GENCODE Genes V14, NA, Ensembl gene ID
-wgEncodeGencodeBasicV7	wgEncodeGencodeBasicV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencodeCompV7	wgEncodeGencodeCompV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencodePseudoGeneV7	wgEncodeGencodePseudoGeneV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencode2wayConsPseudoV7	wgEncodeGencode2wayConsPseudoV7, GENCODE Genes V7, NA, Ensembl gene ID
-wgEncodeGencodePolyaV7	wgEncodeGencodePolyaV7, GENCODE Genes V7, NA, Ensembl gene ID
-flyBaseGene	flyBaseGene, FlyBase Genes, NA, Name of canonical transcript in cluster
-sgdGene	sgdGene, SGD Genes, NA, Name of canonical transcript in cluster
-geneSymbol	geneSymbol, refGene, refFlat, Gene Symbol
--- a/tool-data/genomes.loc.sample	Mon Mar 07 14:35:53 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,170 +0,0 @@
-hg38	hg38, Human, Dec. 2013, Genome Reference Consortium GRCh38
-hg19	hg19, Human, Feb. 2009, Genome Reference Consortium GRCh37
-hg18	hg18, Human, Mar. 2006, NCBI Build 36.1
-hg17	hg17, Human, May 2004, NCBI Build 35
-hg16	hg16, Human, Jul. 2003, NCBI Build 34
-vicPac2	vicPac2, Alpaca, Mar. 2013, Broad Institute Vicugna_pacos-2.0.1
-vicPac1	vicPac1, Alpaca, Jul. 2008, Broad Institute VicPac1.0
-dasNov3	dasNov3, Armadillo, Dec. 2011, Broad Institute DasNov3
-otoGar3	otoGar3, Bushbaby, Mar. 2011, Broad Institute OtoGar3
-papHam1	papHam1, Baboon, Nov. 2008, Baylor College of Medicine HGSC Pham_1.0
-papAnu2	papAnu2, Baboon, Mar. 2012, Baylor College of Medicine Panu_2.0
-felCat8	felCat8, Cat, Nov. 2014, ICGSC Felis_catus_8.0
-felCat5	felCat5, Cat, Sep. 2011, ICGSC Felis_catus-6.2
-felCat4	felCat4, Cat, Dec. 2008, NHGRI catChrV17e
-felCat3	felCat3, Cat, Mar. 2006, Broad Institute Release 3
-panTro4	panTro4, Chimp, Feb. 2011, CGSC Build 2.1.4
-panTro3	panTro3, Chimp, Oct. 2010, CGSC Build 2.1.3
-panTro2	panTro2, Chimp, Mar. 2006, CGSC Build 2.1
-panTro1	panTro1, Chimp, Nov. 2003, CGSC Build 1.1
-criGri1	criGri1, Chinese hamster, Jul. 2013, Beijing Genomics Institution-Shenzhen C_griseus_v1.0
-bosTau8	bosTau8, Cow, Jun. 2014, University of Maryland v3.1.1
-bosTau7	bosTau7, Cow, Oct. 2011, Baylor College of Medicine HGSC Btau_4.6.1
-bosTau6	bosTau6, Cow, Nov. 2009, University of Maryland v3.1
-bosTau4	bosTau4, Cow, Oct. 2007, Baylor College of Medicine HGSC Btau_4.0
-bosTau3	bosTau3, Cow, Aug. 2006, Baylor College of Medicine HGSC Btau_3.1
-bosTau2	bosTau2, Cow, Mar. 2005, Baylor College of Medicine HGSC Btau_2.0
-canFam3	canFam3, Dog, Sep. 2011, Broad Institute v3.1
-canFam2	canFam2, Dog, May 2005, Broad Institute v2.0
-canFam1	canFam1, Dog, Jul. 2004, Broad Institute v1.0
-turTru2	turTru2, Dolphin, Oct. 2011, Baylor College of Medicine Ttru_1.4
-loxAfr3	loxAfr3, Elephant, Jul. 2009, Broad Institute LoxAfr3
-musFur1	musFur1, Ferret, Apr. 2011, Ferret Genome Sequencing Consortium MusPutFur1.0
-nomLeu3	nomLeu3, Gibbon, Oct. 2012, Gibbon Genome Sequencing Consortium Nleu3.0
-nomLeu2	nomLeu2, Gibbon, Jun. 2011, Gibbon Genome Sequencing Consortium Nleu1.1
-nomLeu1	nomLeu1, Gibbon, Jan. 2010, Gibbon Genome Sequencing Consortium Nleu1.0
-gorGor3	gorGor3, Gorilla, May 2011, Wellcome Trust Sanger Institute gorGor3.1
-cavPor3	cavPor3, Guinea pig, Feb. 2008, Broad Institute cavPor3
-eriEur2	eriEur2, Hedgehog, May 2012, Broad Institute EriEur2.0
-eriEur1	eriEur1, Hedgehog, Jun. 2006, Broad Institute Draft_v1
-equCab2	equCab2, Horse, Sep. 2007, Broad Institute EquCab2
-equCab1	equCab1, Horse, Jan. 2007, Broad Institute EquCab1
-dipOrd1	dipOrd1, Kangaroo rat, Jul. 2008, Baylor/Broad Institute DipOrd1.0
-triMan1	triMan1, Manatee, Oct. 2011, Broad Institute TriManLat1.0
-calJac3	calJac3, Marmoset, Mar. 2009, WUSTL Callithrix_jacchus-v3.2
-calJac1	calJac1, Marmoset, Jun. 2007, WUSTL Callithrix_jacchus-v2.0.2
-pteVam1	pteVam1, Megabat, Jul. 2008, Broad Institute Ptevap1.0
-myoLuc2	myoLuc2, Microbat, Jul. 2010, Broad Institute MyoLuc2.0
-balAcu1	balAcu1, Minke whale, Oct. 2013, KORDI BalAcu1.0
-mm10	mm10, Mouse, Dec. 2011, Genome Reference Consortium GRCm38
-mm9	mm9, Mouse, Jul. 2007, NCBI Build 37
-mm8	mm8, Mouse, Feb. 2006, NCBI Build 36
-mm7	mm7, Mouse, Aug. 2005, NCBI Build 35
-micMur2	micMur2, Mouse lemur, May 2015, Baylor/Broad Institute Mmur_2.0
-micMur1	micMur1, Mouse lemur, Jul. 2007, Broad Institute MicMur1.0
-hetGla2	hetGla2, Naked mole-rat, Jan. 2012, Broad Institute HetGla_female_1.0
-hetGla1	hetGla1, Naked mole-rat, Jul. 2011, Beijing Genomics Institute HetGla_1.0
-monDom5	monDom5, Opossum, Oct. 2006, Broad Institute release MonDom5
-monDom4	monDom4, Opossum, Jan. 2006, Broad Institute release MonDom4
-monDom1	monDom1, Opossum, Oct. 2004, Broad Institute release MonDom1
-ponAbe2	ponAbe2, Orangutan, Jul. 2007, WUSTL Pongo_albelii-2.0.2
-ailMel1	ailMel1, Panda, Dec. 2009, BGI-Shenzhen AilMel 1.0
-susScr3	susScr3, Pig, Aug. 2011, Swine Genome Sequencing Consortium Sscrofa10.2
-susScr2	susScr2, Pig, Nov. 2009, Swine Genome Sequencing Consortium Sscrofa9.2
-ochPri3	ochPri3, Pika, May 2012, Broad Institute OchPri3.0
-ochPri2	ochPri2, Pika, Jul. 2008, Broad Institute OchPri2
-ornAna2	ornAna2, Platypus, Feb. 2007, WUSTL v5.0.1
-ornAna1	ornAna1, Platypus, Mar. 2007, WUSTL v5.0.1
-oryCun2	oryCun2, Rabbit, Apr. 2009, Broad Institute release OryCun2
-rn6	rn6, Rat, Jul. 2014, RGSC Rnor_6.0
-rn5	rn5, Rat, Mar. 2012, RGSC Rnor_5.0
-rn4	rn4, Rat, Nov. 2004, Baylor College of Medicine HGSC v3.4
-rn3	rn3, Rat, Jun. 2003, Baylor College of Medicine HGSC v3.1
-rheMac3	rheMac3, Rhesus, Oct. 2010, Beijing Genomics Institute CR_1.0
-rheMac2	rheMac2, Rhesus, Jan. 2006, Baylor College of Medicine HGSC v1.0 Mmul_051212
-proCap1	proCap1, Rock hyrax, Jul. 2008, Baylor College of Medicine HGSC Procap1.0
-oviAri3	oviAri3, Sheep, Aug. 2012, ISGC Oar_v3.1
-oviAri1	oviAri1, Sheep, Feb. 2010, ISGC Ovis aries 1.0
-sorAra2	sorAra2, Shrew, Aug. 2008, Broad Institute SorAra2.0
-sorAra1	sorAra1, Shrew, Jun. 2006, Broad Institute SorAra1.0
-choHof1	choHof1, Sloth, Jul. 2008, Broad Institute ChoHof1.0
-speTri2	speTri2, Squirrel, Nov. 2011, Broad Institute SpeTri2.0
-saiBol1	saiBol1, Squirrel monkey, Oct. 2011, Broad Institute SaiBol1.0
-tarSyr2	tarSyr2, Tarsier, Sep. 2013, WashU Tarsius_syrichta-2.0.1
-tarSyr1	tarSyr1, Tarsier, Aug. 2008, WUSTL/Broad Institute Tarsyr1.0
-sarHar1	sarHar1, Tasmanian devil, Feb. 2011, Wellcome Trust Sanger Institute Devil_refv7.0
-echTel2	echTel2, Tenrec, Nov. 2012, Broad Institute EchTel2.0
-echTel1	echTel1, Tenrec, Jul. 2005, Broad Institute echTel1
-tupBel1	tupBel1, Tree shrew, Dec. 2006, Broad Institute Tupbel1.0
-macEug2	macEug2, Wallaby, Sep. 2009, Tammar Wallaby Genome Sequencing Consortium Meug_1.1
-cerSim1	cerSim1, White rhinoceros, May 2012, Broad Institute CerSimSim1.0
-allMis1	allMis1, American alligator, Aug. 2012, Int. Crocodilian Genomes Working Group allMis0.2
-gadMor1	gadMor1, Atlantic cod, May 2010, Genofisk GadMor_May2010
-melUnd1	melUnd1, Budgerigar, Sep. 2011, WUSTL v6.3
-galGal4	galGal4, Chicken, Nov.
-2011, ICGC Gallus-gallus-4.0
-galGal3	galGal3, Chicken, May 2006, WUSTL Gallus-gallus-2.1
-galGal2	galGal2, Chicken, Feb. 2004, WUSTL Gallus-gallus-1.0
-latCha1	latCha1, Coelacanth, Aug. 2011, Broad Institute LatCha1
-calMil1	calMil1, Elephant shark, Dec. 2013, IMCB Callorhinchus_milli_6.1.3
-fr3	fr3, Fugu, Oct. 2011, JGI v5.0
-fr2	fr2, Fugu, Oct. 2004, JGI v4.0
-fr1	fr1, Fugu, Aug. 2002, JGI v3.0
-petMar2	petMar2, Lamprey, Sep. 2010, WUGSC 7.0
-petMar1	petMar1, Lamprey, Mar. 2007, WUSTL v3.0
-anoCar2	anoCar2, Lizard, May 2010, Broad Institute AnoCar2
-anoCar1	anoCar1, Lizard, Feb. 2007, Broad Institute AnoCar1
-oryLat2	oryLat2, Medaka, Oct. 2005, NIG v1.0
-geoFor1	geoFor1, Medium ground finch, Apr. 2012, BGI GeoFor_1.0 / NCBI 13302
-oreNil2	oreNil2, Nile tilapia, Jan. 2011, Broad Institute Release OreNil1.1
-chrPic1	chrPic1, Painted turtle, Dec. 2011, IPTGSC Chrysemys_picta_bellii-3.0.1
-gasAcu1	gasAcu1, Stickleback, Feb. 2006, Broad Institute Release 1.0
-tetNig2	tetNig2, Tetraodon, Mar. 2007, Genoscope v7
-tetNig1	tetNig1, Tetraodon, Feb. 2004, Genoscope v7
-melGal1	melGal1, Turkey, Dec. 2009, Turkey Genome Consortium v2.01
-xenTro7	xenTro7, X. tropicalis, Sep. 2012, JGI v.7.0
-xenTro3	xenTro3, X. tropicalis, Nov. 2009, JGI v.4.2
-xenTro2	xenTro2, X. tropicalis, Aug. 2005, JGI v.4.1
-xenTro1	xenTro1, X. tropicalis, Oct. 2004, JGI v.3.0
-taeGut2	taeGut2, Zebra finch, Feb. 2013, WashU taeGut324
-taeGut1	taeGut1, Zebra finch, Jul. 2008, WUSTL v3.2.4
-danRer10	danRer10, Zebrafish, Sep. 2014, Genome Reference Consortium GRCz10 
-danRer7	danRer7, Zebrafish, Jul. 2010, Sanger Institute Zv9 
-danRer6	danRer6, Zebrafish, Dec. 2008, Sanger Institute Zv8 
-danRer5	danRer5, Zebrafish, Jul. 2007, Sanger Institute Zv7 
-danRer4	danRer4, Zebrafish, Mar. 2006, Sanger Institute Zv6 
-danRer3	danRer3, Zebrafish, May 2005, Sanger Institute Zv5 
-ci2	ci2, C. intestinalis, Mar. 2005, JGI v2.0
-ci1	ci1, C. intestinalis, Dec. 2002, JGI v1.0
-braFlo1	braFlo1, Lancelet, Mar. 2006, JGI v1.0
-strPur2	strPur2, S. purpuratus, Sep. 2006, Baylor College of Medicine HGSC v. Spur 2.1
-strPur1	strPur1, S. purpuratus, Apr. 2005, Baylor College of Medicine HGSC v. Spur_0.5
-apiMel2	apiMel2, A. mellifera, Jan. 2005, Baylor College of Medicine HGSC v.Amel_2.0 
-apiMel1	apiMel1, A. mellifera, Jul. 2004, Baylor College of Medicine HGSC v.Amel_1.2 
-anoGam1	anoGam1, A. gambiae, Feb. 2003, IAGP v.MOZ2
-droAna2	droAna2, D. ananassae, Aug. 2005, Agencourt Arachne release
-droAna1	droAna1, D. ananassae, Jul. 2004, TIGR Celera release
-droEre1	droEre1, D. erecta, Aug. 2005, Agencourt Arachne release
-droGri1	droGri1, D. grimshawi, Aug. 2005, Agencourt Arachne release
-dm6	dm6, D. melanogaster, Aug. 2014, BDGP Release 6 + ISO1 MT
-dm3	dm3, D. melanogaster, Apr. 2006, BDGP Release 5
-dm2	dm2, D. melanogaster, Apr. 2004, BDGP Release 4
-dm1	dm1, D. melanogaster, Jan. 2003, BDGP Release 3
-droMoj2	droMoj2, D. mojavensis, Aug. 2005, Agencourt Arachne release
-droMoj1	droMoj1, D. mojavensis, Aug. 2004, Agencourt Arachne release
-droPer1	droPer1, D. persimilis, Oct. 2005, Broad Institute release
-dp3	dp3, D. pseudoobscura, Nov. 2004, Flybase Release 1.0
-dp2	dp2, D. pseudoobscura, Aug. 2003, Baylor College of Medicine HGSC Freeze 1
-droSec1	droSec1, D. sechellia, Oct. 2005, Broad Institute Release 1.0
-droSim1	droSim1, D. simulans, Apr. 2005, WUSTL Release 1.0
-droVir2	droVir2, D. virilis, Aug. 2005, Agencourt Arachne release
-droVir1	droVir1, D. virilis, Jul. 2004, Agencourt Arachne release
-droYak2	droYak2, D. yakuba, Nov. 2005, WUSTL Release 2.0
-droYak1	droYak1, D. yakuba, Apr. 2004, WUSTL Release 1.0
-caePb2	caePb2, C. brenneri, Feb. 2008, WUSTL 6.0.1
-caePb1	caePb1, C. brenneri, Jan. 2007, WUSTL 4.0
-cb3	cb3, C. briggsae, Jan. 2007, WUSTL Cb3
-cb1	cb1, C. briggsae, Jul. 2002, WormBase v. cb25.agp8
-ce10	ce10, C. elegans, Oct. 2010, WormBase v. WS220
-ce6	ce6, C. elegans, May 2008, WormBase v. WS190
-ce4	ce4, C. elegans, Jan. 2007, WormBase v. WS170
-ce2	ce2, C. elegans, Mar. 2004, WormBase v. WS120
-caeJap1	caeJap1, C. japonica, Mar. 2008, WUSTL 3.0.2
-caeRem3	caeRem3, C. remanei, May 2007, WUSTL 15.0.1
-caeRem2	caeRem2, C. remanei, Mar. 2006, WUSTL 1.0
-priPac1	priPac1, P. pacificus, Feb. 2007, WUSTL 5.0
-aplCal1	aplCal1, Sea Hare, Sep. 2008, Broad Release Aplcal2.0
-sacCer3	sacCer3, Yeast, April 2011, SGD April 2011 sequence
-sacCer2	sacCer2, Yeast, June 2008, SGD June 2008 sequence
-sacCer1	sacCer1, Yeast, Oct. 2003, SGD 1 Oct 2003 sequence
-eboVir3	eboVir3, Ebola Virus, June 2014, Sierra Leone 2014 (G3683/KM034562.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/listing_available_identifiers_and_data_sources.r	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,34 @@
+# Use this script to generate the .loc.sample with available organisms, available gene identifier and available categories
+library("reshape2")
+# install all packages
+packages=read.table("org_packages.tab")
+biocLite(packages$V1, dependencies=TRUE)
+# load all packages
+lapply(packages$V1, require, character.only = TRUE)
+# create package_name vector
+pkg_name = sapply(packages$V1, function(x) paste("package:", x, sep="") )
+# list package_functions
+organism_names = sapply(packages$V1, function(x) paste(eval( parse( text=paste( gsub (".db$", "", x ), "ORGANISM", sep="") ) ), paste( paste("(", x, sep=""), ")", sep="")))
+identifiers = c( "GENENAME", "UNIGENE", "UNIPROT", "REFSEQ", "SYMBOL", "ENSEMBL", "FLYBASECG", "ACCNUM" )
+org_name_tab = data.frame(organism_names, packages$V1)
+categories = c( "PMID", "ENZYME", "GO2ALLEGS", "PATH", "GO2ALLTAIRS", "GO2ALLORFS", "PFAM", "PROSITE" )
+
+# get dataframe suitable for galaxy's <filter></> tagset
+filter_tab = melt(sapply(pkg_name, ls))
+filter_tab$L1 = sapply( filter_tab$L1, function(x) gsub( "package:", "", x) )
+patterns=paste(unique(sapply(filter_tab$L1, function(x) gsub( ".db$", "", x )) ), collapse="|")
+filter_tab[,1] = gsub( patterns, "", filter_tab[,1] )
+
+# add the ENTREZ id format to the available_identifiers
+
+available_identifiers = subset(filter_tab, value %in% identifiers)
+available_identifiers = cbind(available_identifiers[,1], available_identifiers)
+available_categories = subset(filter_tab, value %in% categories)
+available_categories = cbind(available_categories[,1], available_categories)
+entrez = data.frame(rep("ENTREZ", length(packages$V1)), rep("ENTREZ", length(packages$V1)), packages$V1)
+colnames(entrez) = colnames(available_identifiers)
+available_identifiers = rbind(entrez, available_identifiers)
+
+write.table(available_identifiers, file = "available_identifiers.loc.sample", sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
+write.table(available_categories, file = "available_categories.loc.sample", sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
+write.table(org_name_tab, file = "org_name.loc.sample", sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/org_name.loc.sample	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,20 @@
+Anopheles gambiae (org.Ag.eg.db)	org.Ag.eg.db
+Arabidopsis thaliana (org.At.tair.db)	org.At.tair.db
+Bos taurus (org.Bt.eg.db)	org.Bt.eg.db
+Caenorhabditis elegans (org.Ce.eg.db)	org.Ce.eg.db
+Canis familiaris (org.Cf.eg.db)	org.Cf.eg.db
+Drosophila melanogaster (org.Dm.eg.db)	org.Dm.eg.db
+Danio rerio (org.Dr.eg.db)	org.Dr.eg.db
+Escherichia coli (org.EcK12.eg.db)	org.EcK12.eg.db
+Escherichia coli (org.EcSakai.eg.db)	org.EcSakai.eg.db
+Gallus gallus (org.Gg.eg.db)	org.Gg.eg.db
+Homo sapiens (org.Hs.eg.db)	org.Hs.eg.db
+Mus musculus (org.Mm.eg.db)	org.Mm.eg.db
+Macaca mulatta (org.Mmu.eg.db)	org.Mmu.eg.db
+Plasmodium falciparum (org.Pf.plasmo.db)	org.Pf.plasmo.db
+Pan troglodytes (org.Pt.eg.db)	org.Pt.eg.db
+Rattus norvegicus (org.Rn.eg.db)	org.Rn.eg.db
+Saccharomyces cerevisiae (org.Sc.sgd.db)	org.Sc.sgd.db
+Sus scrofa (org.Ss.eg.db)	org.Ss.eg.db
+Toxoplasma gondii (org.Tgondii.eg.db)	org.Tgondii.eg.db
+Xenopus laevis (org.Xl.eg.db)	org.Xl.eg.db
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/org_packages.tab	Thu Mar 31 12:23:45 2016 -0400
@@ -0,0 +1,20 @@
+org.Ag.eg.db
+org.At.tair.db
+org.Bt.eg.db
+org.Ce.eg.db
+org.Cf.eg.db
+org.Dm.eg.db
+org.Dr.eg.db
+org.EcK12.eg.db
+org.EcSakai.eg.db
+org.Gg.eg.db
+org.Hs.eg.db
+org.Mm.eg.db
+org.Mmu.eg.db
+org.Pf.plasmo.db
+org.Pt.eg.db
+org.Rn.eg.db
+org.Sc.sgd.db
+org.Ss.eg.db
+org.Tgondii.eg.db
+org.Xl.eg.db
--- a/tool_data_table_conf.xml.sample	Mon Mar 07 14:35:53 2016 -0500
+++ b/tool_data_table_conf.xml.sample	Thu Mar 31 12:23:45 2016 -0400
@@ -1,11 +1,15 @@
 <tables>
-    <!-- Location of Picard dict file and other files -->
-    <table name="go_genomes" comment_char="#">
-        <columns>value, name</columns>
-        <file path="${__HERE__}/test-data/genomes.loc" />
+    <!-- Available organism packages and available categories for getgo tool -->
+    <table name="org_names" comment_char="#">
+        <columns>name, value</columns>
+        <file path="tool-data/org_names.loc.sample" />
     </table>
-    <table name="go_gene_ids" comment_char="#">
-        <columns>value, name</columns>
-        <file path="${__HERE__}/test-data/gene_ids.loc" />
+    <table name="available_categories" comment_char="#">
+        <columns>name, value, package</columns>
+        <file path="tool-data/available_categories.loc.sample" />
+    </table>
+    <table name="available_identifiers" comment_char="#">
+        <columns>name, value, package</columns>
+        <file path="tool-data/available_identifiers.loc.sample" />
     </table>
 </tables>
--- a/tool_data_table_conf.xml.sample.test	Mon Mar 07 14:35:53 2016 -0500
+++ b/tool_data_table_conf.xml.sample.test	Thu Mar 31 12:23:45 2016 -0400
@@ -1,11 +1,15 @@
 <tables>
-    <!-- Location of Picard dict file and other files -->
-    <table name="go_genomes" comment_char="#">
-        <columns>value, name</columns>
-        <file path="tool-data/genomes.loc" />
+    <!-- Available organism packages and available categories for getgo tool -->
+    <table name="org_names" comment_char="#">
+        <columns>name, value</columns>
+        <file path="${__HERE__}/test-data/org_names.loc" />
     </table>
-    <table name="go_gene_ids" comment_char="#">
-        <columns>value, name</columns>
-        <file path="tool-data/gene_ids.loc" />
+    <table name="available_categories" comment_char="#">
+        <columns>name, value, package</columns>
+        <file path="${__HERE__}/test-data/available_categories.loc" />
+    </table>
+    <table name="available_identifiers" comment_char="#">
+        <columns>name, value, package</columns>
+        <file path="${__HERE__}/test-data/available_identifiers.loc" />
     </table>
 </tables>
--- a/tool_dependencies.xml	Mon Mar 07 14:35:53 2016 -0500
+++ b/tool_dependencies.xml	Thu Mar 31 12:23:45 2016 -0400
@@ -4,6 +4,6 @@
          <repository changeset_revision="9f31a291b305" name="package_r_3_2_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="goseq" version="1.22.0">
-         <repository changeset_revision="4f43af0c6b44" name="package_r_3_2_1_goseq_1_22_0" owner="mvdbeek" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+         <repository changeset_revision="cc302cd9b007" name="package_r_3_2_1_goseq_1_22_0" owner="mvdbeek" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>