changeset 6:0e9424413ab0 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
author mvdbeek
date Thu, 03 Mar 2016 09:56:51 -0500
parents b79c65c90744
children 9ffae7bc23c2
files get_length_and_gc_content.r get_length_and_gc_content.xml getgo.r getgo.xml go_macros.xml goseq.r goseq.xml test-data/category.tab test-data/dge_list.tab test-data/gc.tab test-data/gene_length.tab test-data/go_terms.tab test-data/in.fasta test-data/in.gtf test-data/length.tab test-data/wal.tab test-data/wall.tab
diffstat 16 files changed, 1260 insertions(+), 100 deletions(-) [+]
line wrap: on
line diff
--- a/get_length_and_gc_content.r	Sun Feb 28 11:52:10 2016 -0500
+++ b/get_length_and_gc_content.r	Thu Mar 03 09:56:51 2016 -0500
@@ -1,16 +1,23 @@
 # originally by Devon Ryan, https://www.biostars.org/p/84467/
-sink(stdout(), type = "message")
+
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
-library(GenomicRanges)
-library(rtracklayer)
-library(Rsamtools)
-library(optparse)
-library(data.table)
+suppressPackageStartupMessages({
+    library("GenomicRanges")
+    library("rtracklayer")
+    library("Rsamtools")
+    library("optparse")
+    library("data.table")
+})
 
 option_list <- list(
     make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
     make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."),
-    make_option(c("-o","--output"), type="character", default=FALSE, help="Output file with gene name, length and GC content.")
+    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."),
+    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.")
   )
 
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
@@ -18,7 +25,8 @@
 
 GTFfile = args$gtf
 FASTAfile = args$fasta
-output_file = args$output
+length = args$length
+gc_content = args$gc_content
 
 #Load the annotation and reduce it
 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
@@ -41,9 +49,11 @@
     c(width, nGCs/width)
 }
 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
-output <- setDT(data.frame(output), keep.rownames = TRUE)[]
-colnames(output) <- c("#gene_id", "length", "GC")
+output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
+
 
-write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t")
+write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+
 
 sessionInfo()
\ No newline at end of file
--- a/get_length_and_gc_content.xml	Sun Feb 28 11:52:10 2016 -0500
+++ b/get_length_and_gc_content.xml	Thu Mar 03 09:56:51 2016 -0500
@@ -1,9 +1,10 @@
-<tool id="length_and_gc_content" name="gene length and gc content from gtf file" version="0.1.0">
-    <description />
-    <requirements>
-        <requirement type="package" version="3.2.1">R</requirement>
-        <requirement type="package" version="1.22.0">goseq</requirement>
-    </requirements>
+<tool id="length_and_gc_content" name="gene length and gc content" version="0.1.0">
+    <description>from GTF file</description>
+    <macros>
+        <import>go_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
     <command interpreter="Rscript">
         get_length_and_gc_content.r --gtf "$gtf"
         #if $fastaSource.genomeSource == "indexed":
@@ -11,10 +12,11 @@
         #else:
             --fasta "$fastaSource.fasta_history"
         #end if
-        --output "$output"
+        --length "$length"
+        --gc_content "$gc_content"
     </command>
     <inputs>
-        <param help="The GTF must match the FASTA file" label="GTF file for length and GC calculation" name="gtf" type="data" />
+        <param help="The GTF must match the FASTA file" label="GTF file for length and GC calculation" name="gtf" type="data" format="gtf" />
         <conditional name="fastaSource">
             <param help="choose history if you don't see the correct genome fasta" label="Select a reference fasta from your history or use a built-in fasta?" name="genomeSource" type="select">
                 <option value="indexed">Use a built-in fasta</option>
@@ -26,19 +28,40 @@
             </param>
         </when>
         <when value="history">
-            <param format="fasta" label="Select a fasta file, to serve as index reference" name="fasta_history" type="data" />
+            <param format="fasta" label="Select a fasta file that matches the supplied GTF file" name="fasta_history" type="data" />
         </when>
         </conditional>
     </inputs>
     <outputs>
-        <data format="tabular" label="length and gc content" name="output" />
+        <data format="tabular" label="gene length" name="length">
+            <!-- future: set this when 16.04 has been released
+            <actions>
+                <action name="column_names" type="metadata" default="gene,length" />
+            </actions>
+            -->
+        </data>
+        <data format="tabular" label="gene gc content" name="gc_content">
+            <!-- future: set this when 16.04 has been released
+             <actions>
+                <action name="column_names" type="metadata" default="gene,gc_content" />
+            </actions>
+            -->
+        </data>
     </outputs>
-    <tests></tests>
+    <tests>
+        <test>
+            <param name="gtf" value="in.gtf" ftype="gtf"></param>
+            <param name="fastaSource|genomeSource" value="history"></param>
+            <param name="fastaSource|fasta_history" value="in.fasta" ftype="fasta"></param>
+            <output name="length" file="length.tab"></output>
+            <output name="gc_content" file="gc.tab"></output>
+        </test>
+    </tests>
     <help>
 
         **What it does**
 
-        Returns a tabular file with gene name, length and GC content, based on a supplied GTF and a FASTA file.
+        Returns a tabular file with gene id and length and a tabular file with gene id and GC content, based on a supplied GTF and a FASTA file.
 
 
         </help>
--- a/getgo.r	Sun Feb 28 11:52:10 2016 -0500
+++ b/getgo.r	Thu Mar 03 09:56:51 2016 -0500
@@ -1,7 +1,15 @@
-suppressWarnings(suppressMessages(library(goseq)))
-suppressWarnings(suppressMessages(library(optparse)))
-suppressWarnings(suppressMessages(library(rtracklayer)))
-suppressWarnings(suppressMessages(library(reshape2)))
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+    library("goseq")
+    library("optparse")
+    library("rtracklayer")
+    library("reshape2")
+})
+
 sink(stdout(), type = "message")
 
 option_list <- list(
@@ -15,21 +23,19 @@
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
 args = parse_args(parser)
 
-# Vars:
+# vars
 
 gtf = args$gtf
 genome = args$genome
 gene_id = args$gene_id
 output = args$output
 cats = unlist(strsplit(args$cats, ','))
+
+# retrieve and transform data
 genes = unique(import.gff(gtf)$gene_id)
-go_categories = getgo(genes, genome, id, fetch.cats=cats)
-
-# transform go category list to sth. more manipulatable in galaxy
-go_categories <- lapply(go_categories, unlist)
+go_categories = getgo(genes, genome, gene_id, fetch.cats=cats)
 go_categories = goseq:::reversemapping(go_categories)
 go_categories = melt(go_categories)
-colnames(go_categories) = c("#gene_id", "go_category")
 
-write.table(go_categories, output, sep="\t", row.names = FALSE, quote = FALSE)
+write.table(go_categories, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
 sessionInfo()
\ No newline at end of file
--- a/getgo.xml	Sun Feb 28 11:52:10 2016 -0500
+++ b/getgo.xml	Thu Mar 03 09:56:51 2016 -0500
@@ -1,9 +1,10 @@
-<tool id="getgo" name="Retrieve GO ontologies" version="0.1.0">
-    <description />
-    <requirements>
-        <requirement type="package" version="3.2.1">R</requirement>
-        <requirement type="package" version="1.22.0">goseq</requirement>
-    </requirements>
+<tool id="getgo" name="getgo" version="0.1.0">
+    <description>downloads gene ontologies for model organisms</description>
+    <macros>
+        <import>go_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
     <command interpreter="Rscript">
         getgo.r --genome "$genome"
         --gtf "$gtf"
@@ -29,15 +30,24 @@
     <outputs>
         <data format="tabular" label="GO category mapping" name="output" />
     </outputs>
-    <tests></tests>
+    <tests>
+        <test>
+            <param name="gtf" value="in.gtf" ftype="gtf"></param>
+            <param name="genome" value="hg38"></param>
+            <param name="gene_id" value="ensGene"></param>
+            <param name="cats" value="GO:CC,GO:BP,GO:MF"></param>
+            <output name="output" file="go_terms.tab"></output>
+        </test>
+    </tests>
     <help>
 
         **What it does**
 
-        Returns a tabular file with GO gene categories.
+        Returns a tabular file with GO gene categories for all genes present in the input GTF file.
 
 
         </help>
     <citations>
+        <citation type="doi">10.1186/gb-2010-11-2-r14</citation>
     </citations>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/go_macros.xml	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,24 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.2.1">R</requirement>
+            <requirement type="package" version="1.22.0">goseq</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <regex match="Execution halted"
+                   source="both"
+                   level="fatal"
+                   description="Execution halted." />
+            <regex match="Error in"
+                   source="both"
+                   level="fatal"
+                   description="An undefined error occured, please check your input carefully and contact your administrator." />
+            <regex match="Fatal error"
+                   source="both"
+                   level="fatal"
+                   description="An undefined error occured, please check your input carefully and contact your administrator." />
+        </stdio>
+    </xml>
+</macros>
\ No newline at end of file
--- a/goseq.r	Sun Feb 28 11:52:10 2016 -0500
+++ b/goseq.r	Thu Mar 03 09:56:51 2016 -0500
@@ -1,6 +1,12 @@
-sink(stdout(), type = "message")
-suppressWarnings(suppressMessages(library(goseq)))
-suppressWarnings(suppressMessages(library(optparse)))
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+    library("goseq")
+    library("optparse")
+})
 
 option_list <- list(
     make_option(c("-d", "--dge_file"), type="character", help="Path to file with differential gene expression result"),
@@ -14,16 +20,20 @@
                 help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"),
     make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"),
     make_option(c("-lf", "--length_file"), type="character", default="FALSE", help = "Path to tabular file mapping gene id to length"),
-    make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"),
-    make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file"),
-    make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical", help="A boolean to indicate whether genes without a categorie should still be used. For example, a large number of gene may have no GO term annotated.  If thisoption is set to FALSE, those genes will be ignored in the calculation of p-values(default behaviour).  If this option is set to TRUE, then these genes will count towards  the  total  number  of  genes  outside  the  category  being  tested  (default behaviour prior to version 1.15.2)."
-)
-  )
+    make_option(c("-cat_file", "--category_file"), default="FALSE", type="character", help = "Path to tabular file with gene_id <-> category mapping."),
+    make_option(c("-g", "--genome"), default=NULL, type="character", help = "Genome [used for looking up correct gene length]"),
+    make_option(c("-i", "--gene_id"), default=NULL, type="character", help = "Gene ID format of genes in DGE file"),
+    make_option(c("-cat", "--use_genes_without_cat"), default=FALSE, type="logical",
+                help="A large number of gene may have no GO term annotated. If this option is set to FALSE, genes without category will be ignored in the calculation of p-values(default behaviour). If TRUE these genes will count towards the total number of genes outside the tested category (default behaviour prior to version 1.15.2)."),
+    make_option(c("-plots", "--make_plots"), default=FALSE, type="logical", help="produce diagnostic plots?")
+    )
+
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
 args = parse_args(parser)
 
 # Vars:
 dge_file = args$dge_file
+category_file = args$category_file
 p_adj_column = args$p_adj_colum
 p_adj_cutoff = args$p_adj_cutoff
 length_file = args$length_file
@@ -36,45 +46,72 @@
 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot
 repcnt = args$repcnt
 use_genes_without_cat = args$use_genes_without_cat
+make_plots = args$make_plots
 
-# format DE genes into vector suitable for use with goseq
-dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE)
+# format DE genes into named vector suitable for goseq
+first_line = read.delim(dge_file, header = FALSE, nrow=1)
+# check if header [character where numeric is expected]
+if (is.numeric(first_line[,p_adj_column])) {
+  dge_table = read.delim(dge_file, header = FALSE, sep="\t")
+  } else {
+  dge_table = read.delim(dge_file, header = TRUE, sep="\t")
+  }
+
 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff)
 names(genes) = dge_table[,1] # Assuming first row contains gene names
 
-# Get gene lengths
+# gene lengths, assuming last column
 if (length_file != "FALSE" ) {
-  length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE)
+  first_line = read.delim(dge_file, header = FALSE, nrow=1)
+  if (is.numeric(first_line[, ncol(first_line)])) {
+    length_table = read.delim(length_file, header=FALSE, sep="\t", check.names=FALSE)
+    } else {
+    length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE)
+    }
   row.names(length_table) = length_table[,1]
-  gene_lengths = length_table[names(genes),]$length
+  gene_lengths = length_table[names(genes),][,ncol(length_table)]
   } else {
   gene_lengths = getlength(names(genes), genome, gene_id)
   }
 
 # Estimate PWF
 
-pdf(length_bias_plot)
-pwf=nullp(genes, genome, gene_id, gene_lengths)
-message = dev.off()
+if (make_plots == TRUE) {
+  pdf(length_bias_plot)
+}
+pwf=nullp(genes, genome = genome, id = gene_id, bias.data = gene_lengths, plot.fit=make_plots)
+graphics.off()
 
-# Fetch GO annotations:
-go_map=getgo(names(genes), genome, gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG"))
+# Fetch GO annotations if category_file hasn't been supplied:
+if (category_file == "FALSE") {
+  go_map=getgo(genes = names(genes), genome = genome, id = gene_id, fetch.cats=c("GO:CC", "GO:BP", "GO:MF", "KEGG"))
+  } else {
+  # check for header: first entry in first column must be present in genes, else it's a header
+  first_line = read.delim(category_file, header = FALSE, nrow=1)
+  if (first_line[,1] %in% names(genes)) {
+     go_map = read.delim(category_file, header = FALSE)
+     } else {
+     go_map = read.delim(category_file, header= TRUE)
+    }
+}
 
 # wallenius approximation of p-values
-GO.wall=goseq(pwf, genome, gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
 
-GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
 
 # Sampling distribution
 if (repcnt > 0) {
-  GO.samp=goseq(pwf,genome, gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
+  GO.samp=goseq(pwf, genome = genome, id = gene_id, method="Sampling", repcnt=repcnt, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
   # Compare sampling with wallenius
+  if (make_plots == TRUE) {
   pdf(sample_vs_wallenius_plot)
   plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]),
      xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)",
      xlim=c(-3,0))
      abline(0,1,col=3,lty=2)
-  message = dev.off()
+  graphics.off()
+  }
   write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE)
 }
 
--- a/goseq.xml	Sun Feb 28 11:52:10 2016 -0500
+++ b/goseq.xml	Thu Mar 03 09:56:51 2016 -0500
@@ -1,18 +1,16 @@
-<tool id="goseq" name="goseq gene ontology analyser" version="0.1.5">
-    <description />
-    <requirements>
-        <requirement type="package" version="3.2.1">R</requirement>
-        <requirement type="package" version="1.22.0">goseq</requirement>
-    </requirements>
+<tool id="goseq" name="goseq" version="0.2.0">
+    <description>tests for overrepresented gene categories</description>
+    <macros>
+        <import>go_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
     <command interpreter="Rscript">
         goseq.r --dge_file "$dge_file"
         --p_adj_column "$p_adj_column"
         --cutoff "$p_adj_cutoff"
-        #if $source.use_length_file == "yes":
         --length_file "$length_file"
-        #end if
-        --genome "$genome"
-        --gene_id "$gene_id"
+        --category_file "$category_file"
         --wallenius_tab "$wallenius_tab"
         --sampling_tab "$sampling_tab"
         --nobias_tab "$nobias_tab"
@@ -20,36 +18,26 @@
         --sample_vs_wallenius_plot "$sample_vs_wallenius_plot"
         --repcnt "$repcnt"
         --use_genes_without_cat "$use_genes_without_cat"
+        --make_plots "$make_plots"
     </command>
     <inputs>
         <param help="deseq2/edger/limma differential gene expression list" label="DGE list" name="dge_file" type="data" format="tabular" />
         <param help="Select the column that contains the multiple-testing corrected p-value" label="p adjust column" name="p_adj_column" type="data_column" numeric="true" data_ref="dge_file"/>
-        <param help="A boolean to indicate whether genes without a categorie should still be used. For example, a large number of gene may have no GO term annotated.  If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
-               name="use_genes_without_cat" label="Count genes without any category" type="boolean"/>
+        <param label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" name="length_file" type="data" format="tabular" required="true" />
+        <param label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" name="category_file" type="data" format="tabular" required="true" />
+        <param help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
+               name="use_genes_without_cat" label="Count genes without any category?" type="boolean"/>
         <param help="Typically 0.05 after multiple testing correction" max="1" label="Minimum p adjust value to consider genes as differentially expressed" name="p_adj_cutoff" type="float" value="0.05" />
-        <conditional name="source">
-            <param help="This is needed if the gene length is not available in goseq. e.g. hg38 and mm10." label="Use gene length file?" name="use_length_file" type="select">
-                <option value="no">no</option>
-                <option value="yes">yes</option>
-            </param>
-            <when value="yes">
-                <param label="Gene length file" name="length_file" type="data" format="tabular" required="false" />
-            </when>
-            <when value="no">
-            </when>
-        </conditional>
-        <param help="Needed to retrieve gene length for length correction" label="Select the genome source" name="genome" size="3" type="select">
-            <options from_data_table="go_genomes"></options>
-        </param>
-        <param help="Needed for GO analysis" label="Select gene identifier" name="gene_id" type="select">
-            <options from_data_table="go_gene_ids"></options>
-        </param>
+        <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param>
         <param help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" name="repcnt" size="3" type="integer" min="0" max="10000" value="0" />
     </inputs>
     <outputs>
-        <data format="pdf" label="length bias plot" name="length_bias_plot" />
+        <data format="pdf" label="length bias plot" name="length_bias_plot">
+            <filter>make_plots</filter>
+        </data>
         <data format="pdf" label="Plot P-value from sampling against wallenius distribution" name="sample_vs_wallenius_plot">
             <filter>repcnt != 0</filter>
+            <filter>make_plots</filter>
         </data>
         <data format="tabular" label="Ranked category list - no length bias correction" name="nobias_tab" />
         <data format="tabular" label="Ranked category list - sampling" name="sampling_tab">
@@ -60,21 +48,32 @@
     <tests>
         <test>
             <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
-            <param name="use_length_file" value="no" />
+            <param name="length_file" value="gene_length.tab" ftype="tabular"/>
+            <param name="category_file" value="category.tab" ftype="tabular"/>
+            <param name="use_genes_without_cat" value="true" />
             <param name="p_adj_column" value="2" />
-            <param name="genome" value="hg19" />
-            <param name="gene_id" value="ensGene" />
-            <output name="wallenius_tab" file="wall.tab" compare="contains"/>/>
+            <output name="wallenius_tab" file="wal.tab" compare="contains"/>/>
         </test>
     </tests>
     <help>
 
         **What it does**
 
-        Detects Gene Ontology and/or other user defined categories which are over/under represented in RNA-seq data.
+        Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.
 
         Options map closely to the excellent manual_
 
+
+        **Input files**
+
+        goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes.
+        The format of this file is tabular, with gene_id in the first column and length in the second column.
+        The "get length and gc content" tool can produce such a file.
+
+        You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,
+        category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive
+        gene ontologies for model organisms, or you can construct your own file.
+
         .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/category.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,6 @@
+ENSG00000162526	GO:0000003
+ENSG00000198648	GO:0000278
+ENSG00000112312	GO:0000278
+ENSG00000174442	GO:0000278
+ENSG00000108953	GO:0000278
+ENSG00000167842	GO:0000278
--- a/test-data/dge_list.tab	Sun Feb 28 11:52:10 2016 -0500
+++ b/test-data/dge_list.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -1,4 +1,3 @@
-Name	adj.p.value
 ENSG00000140459	0.72654265501997
 ENSG00000236824	0.621241793648661
 ENSG00000162526	0.104072112354657
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gc.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,1 @@
+ENSG00000162526	0.388349514563107
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_length.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,948 @@
+ENSG00000003096	6983
+ENSG00000004534	7302
+ENSG00000006327	1848
+ENSG00000006831	5878
+ENSG00000006837	3057
+ENSG00000007392	6176
+ENSG00000008735	5901
+ENSG00000009844	7262
+ENSG00000010322	9161
+ENSG00000010932	3602
+ENSG00000011638	2558
+ENSG00000012983	7620
+ENSG00000013275	2387
+ENSG00000014216	7553
+ENSG00000018408	8413
+ENSG00000018607	1774
+ENSG00000018699	3185
+ENSG00000022556	6313
+ENSG00000023041	3748
+ENSG00000023330	2536
+ENSG00000023697	2844
+ENSG00000023892	2545
+ENSG00000027697	2571
+ENSG00000029363	9297
+ENSG00000032389	5775
+ENSG00000033050	4448
+ENSG00000035403	9992
+ENSG00000042445	4005
+ENSG00000049541	2211
+ENSG00000057608	4033
+ENSG00000057935	10580
+ENSG00000059122	8419
+ENSG00000059588	7743
+ENSG00000063015	5115
+ENSG00000063322	3835
+ENSG00000064545	3281
+ENSG00000065000	8601
+ENSG00000065060	9908
+ENSG00000066739	14778
+ENSG00000066923	8583
+ENSG00000068028	3683
+ENSG00000068650	13733
+ENSG00000069712	4556
+ENSG00000070495	5969
+ENSG00000070610	4803
+ENSG00000070961	9493
+ENSG00000071889	5098
+ENSG00000072071	8487
+ENSG00000072121	15706
+ENSG00000072134	8517
+ENSG00000072864	5525
+ENSG00000072958	14371
+ENSG00000073614	12106
+ENSG00000074054	11052
+ENSG00000074071	1097
+ENSG00000074211	9198
+ENSG00000074319	3839
+ENSG00000074621	9084
+ENSG00000075399	3428
+ENSG00000076356	13781
+ENSG00000079215	6265
+ENSG00000079246	5463
+ENSG00000079785	3833
+ENSG00000079974	5870
+ENSG00000080603	13674
+ENSG00000080815	9947
+ENSG00000081087	5325
+ENSG00000082068	7039
+ENSG00000083535	4253
+ENSG00000083544	6814
+ENSG00000083720	4194
+ENSG00000084073	3572
+ENSG00000085365	5106
+ENSG00000085377	3762
+ENSG00000085982	9352
+ENSG00000085999	3212
+ENSG00000086205	3824
+ENSG00000086289	2864
+ENSG00000087586	2928
+ENSG00000088340	9067
+ENSG00000088448	4563
+ENSG00000089009	4447
+ENSG00000090020	5974
+ENSG00000090273	2856
+ENSG00000090402	6138
+ENSG00000091140	5299
+ENSG00000092068	5991
+ENSG00000092098	4982
+ENSG00000092208	2532
+ENSG00000092445	10945
+ENSG00000099139	12705
+ENSG00000099910	4646
+ENSG00000100014	7324
+ENSG00000100027	4713
+ENSG00000100038	6641
+ENSG00000100106	12238
+ENSG00000100191	2030
+ENSG00000100292	2405
+ENSG00000100336	4377
+ENSG00000100354	19998
+ENSG00000100441	8015
+ENSG00000100478	7223
+ENSG00000100526	1836
+ENSG00000100577	8477
+ENSG00000100852	10888
+ENSG00000101247	7217
+ENSG00000101294	10000
+ENSG00000101473	4213
+ENSG00000102030	4742
+ENSG00000102349	9022
+ENSG00000102606	11768
+ENSG00000102804	8586
+ENSG00000102901	5539
+ENSG00000103035	2740
+ENSG00000103121	12781
+ENSG00000103932	7046
+ENSG00000104325	3507
+ENSG00000104331	7594
+ENSG00000104368	6618
+ENSG00000104450	5055
+ENSG00000105173	2550
+ENSG00000105220	9112
+ENSG00000105223	5184
+ENSG00000105325	5802
+ENSG00000105355	2813
+ENSG00000105438	2251
+ENSG00000105519	4428
+ENSG00000105568	7437
+ENSG00000105879	5481
+ENSG00000106012	9240
+ENSG00000106305	1838
+ENSG00000106683	6902
+ENSG00000106771	9484
+ENSG00000106789	5717
+ENSG00000106803	1190
+ENSG00000106868	4183
+ENSG00000106948	10601
+ENSG00000107295	2682
+ENSG00000107833	928
+ENSG00000108055	4275
+ENSG00000108091	7345
+ENSG00000108306	11038
+ENSG00000108591	7411
+ENSG00000108666	5740
+ENSG00000108848	8151
+ENSG00000108947	3222
+ENSG00000108953	3847
+ENSG00000108960	3177
+ENSG00000109079	3889
+ENSG00000109171	6524
+ENSG00000109610	2128
+ENSG00000109680	3434
+ENSG00000109771	7360
+ENSG00000109787	6297
+ENSG00000109920	7596
+ENSG00000109929	5566
+ENSG00000110002	6403
+ENSG00000110092	4830
+ENSG00000110906	9865
+ENSG00000111247	2558
+ENSG00000111249	7648
+ENSG00000111331	8251
+ENSG00000111652	3113
+ENSG00000111707	5731
+ENSG00000111860	9462
+ENSG00000111877	12496
+ENSG00000112062	6860
+ENSG00000112306	767
+ENSG00000112312	2476
+ENSG00000112365	5519
+ENSG00000112406	5614
+ENSG00000112531	17368
+ENSG00000112874	4304
+ENSG00000113048	6511
+ENSG00000113328	3096
+ENSG00000113621	5265
+ENSG00000113649	8714
+ENSG00000113812	4066
+ENSG00000113916	5938
+ENSG00000114026	8733
+ENSG00000114120	9277
+ENSG00000114315	2062
+ENSG00000114416	12054
+ENSG00000114735	19425
+ENSG00000114999	15732
+ENSG00000115107	4562
+ENSG00000115221	6357
+ENSG00000115464	18125
+ENSG00000115841	5999
+ENSG00000116127	14054
+ENSG00000116171	8305
+ENSG00000116209	9149
+ENSG00000116455	2805
+ENSG00000116580	11967
+ENSG00000116604	6400
+ENSG00000116685	3281
+ENSG00000116761	2414
+ENSG00000116977	9710
+ENSG00000117262	5557
+ENSG00000117410	3535
+ENSG00000117505	10949
+ENSG00000117569	12937
+ENSG00000117758	3846
+ENSG00000117868	7098
+ENSG00000117984	2999
+ENSG00000118096	3814
+ENSG00000118420	6394
+ENSG00000118503	5051
+ENSG00000118507	4394
+ENSG00000118564	4573
+ENSG00000118620	6845
+ENSG00000118655	3940
+ENSG00000118680	1761
+ENSG00000118707	3784
+ENSG00000118873	10344
+ENSG00000119041	7105
+ENSG00000119326	3131
+ENSG00000119421	844
+ENSG00000119669	4157
+ENSG00000119686	4913
+ENSG00000119723	6686
+ENSG00000120158	4215
+ENSG00000120217	4108
+ENSG00000120907	5848
+ENSG00000121390	3223
+ENSG00000121753	7756
+ENSG00000121851	1799
+ENSG00000121988	9115
+ENSG00000122194	6380
+ENSG00000122390	5639
+ENSG00000122644	3561
+ENSG00000122691	2033
+ENSG00000122966	13230
+ENSG00000123384	20839
+ENSG00000123472	5677
+ENSG00000123562	2648
+ENSG00000124103	1137
+ENSG00000124120	7044
+ENSG00000124181	9648
+ENSG00000124275	6498
+ENSG00000124496	7732
+ENSG00000124549	4177
+ENSG00000124574	8083
+ENSG00000124615	5174
+ENSG00000124783	11349
+ENSG00000125637	13442
+ENSG00000125691	5979
+ENSG00000125733	3982
+ENSG00000125734	3213
+ENSG00000125818	5296
+ENSG00000125901	1127
+ENSG00000125944	8676
+ENSG00000126001	16295
+ENSG00000126003	5705
+ENSG00000126226	6243
+ENSG00000126562	5089
+ENSG00000126858	6372
+ENSG00000127314	16942
+ENSG00000127399	2735
+ENSG00000127989	4890
+ENSG00000128016	2010
+ENSG00000128590	2837
+ENSG00000128604	4375
+ENSG00000128654	1717
+ENSG00000128731	20027
+ENSG00000128739	2919
+ENSG00000128965	2058
+ENSG00000128973	4225
+ENSG00000129354	2214
+ENSG00000129518	2030
+ENSG00000129559	3879
+ENSG00000129625	4230
+ENSG00000129932	2387
+ENSG00000130222	1322
+ENSG00000130227	6909
+ENSG00000130305	3414
+ENSG00000130349	2036
+ENSG00000130592	5399
+ENSG00000130695	5422
+ENSG00000130717	2688
+ENSG00000130770	2225
+ENSG00000130787	7102
+ENSG00000130827	11710
+ENSG00000131051	9307
+ENSG00000131100	3212
+ENSG00000131467	5152
+ENSG00000131759	4731
+ENSG00000132155	7847
+ENSG00000132274	6020
+ENSG00000132294	6106
+ENSG00000132604	5621
+ENSG00000132823	1712
+ENSG00000132879	3527
+ENSG00000133114	10842
+ENSG00000133138	9503
+ENSG00000133226	8168
+ENSG00000133302	7185
+ENSG00000133678	2996
+ENSG00000133816	15809
+ENSG00000133872	2990
+ENSG00000134109	6758
+ENSG00000134153	1538
+ENSG00000134291	3475
+ENSG00000134324	11861
+ENSG00000134326	4302
+ENSG00000134461	3059
+ENSG00000134463	2185
+ENSG00000134690	2469
+ENSG00000134765	4271
+ENSG00000134852	11762
+ENSG00000134905	10460
+ENSG00000134986	9337
+ENSG00000134987	7656
+ENSG00000135045	2327
+ENSG00000135048	9625
+ENSG00000135124	8159
+ENSG00000135250	6341
+ENSG00000135535	6944
+ENSG00000135537	8450
+ENSG00000135605	4030
+ENSG00000135679	13273
+ENSG00000135698	2190
+ENSG00000135930	7177
+ENSG00000136048	4180
+ENSG00000136238	3114
+ENSG00000136709	14580
+ENSG00000136720	4683
+ENSG00000136819	5099
+ENSG00000137210	4815
+ENSG00000137285	2194
+ENSG00000137642	19449
+ENSG00000137776	8157
+ENSG00000137806	1790
+ENSG00000137812	10250
+ENSG00000137824	5114
+ENSG00000137965	2038
+ENSG00000138028	4039
+ENSG00000138459	4853
+ENSG00000138587	2504
+ENSG00000138594	10632
+ENSG00000138685	6775
+ENSG00000138750	4062
+ENSG00000138798	6127
+ENSG00000138835	13793
+ENSG00000139192	3188
+ENSG00000139200	3266
+ENSG00000139233	7993
+ENSG00000139496	10422
+ENSG00000139718	8314
+ENSG00000139726	3576
+ENSG00000139910	10046
+ENSG00000139971	7565
+ENSG00000140265	6634
+ENSG00000140391	8506
+ENSG00000140396	10026
+ENSG00000140416	12492
+ENSG00000140459	3491
+ENSG00000141012	8402
+ENSG00000141179	5880
+ENSG00000141367	11836
+ENSG00000141404	8445
+ENSG00000141425	8442
+ENSG00000141448	3945
+ENSG00000141510	3936
+ENSG00000141699	5145
+ENSG00000141736	10321
+ENSG00000141756	4273
+ENSG00000142082	5202
+ENSG00000142208	11162
+ENSG00000142599	11503
+ENSG00000142676	1987
+ENSG00000142784	6024
+ENSG00000142871	2455
+ENSG00000142961	5916
+ENSG00000143198	8401
+ENSG00000143256	677
+ENSG00000143337	9615
+ENSG00000143401	3605
+ENSG00000143418	4644
+ENSG00000143458	9189
+ENSG00000143641	7743
+ENSG00000143727	4160
+ENSG00000143816	3971
+ENSG00000143919	3991
+ENSG00000143970	13023
+ENSG00000144524	6066
+ENSG00000145354	2607
+ENSG00000145494	1451
+ENSG00000145555	13963
+ENSG00000145604	3837
+ENSG00000145736	8676
+ENSG00000145990	10686
+ENSG00000146066	668
+ENSG00000146350	6912
+ENSG00000146425	2949
+ENSG00000146574	7316
+ENSG00000146733	2762
+ENSG00000146802	7995
+ENSG00000147050	7139
+ENSG00000147251	6721
+ENSG00000147364	11438
+ENSG00000147394	4812
+ENSG00000147439	5072
+ENSG00000147669	1711
+ENSG00000147684	2629
+ENSG00000148158	11578
+ENSG00000148334	3039
+ENSG00000148408	9897
+ENSG00000148484	4191
+ENSG00000148700	6541
+ENSG00000148834	1691
+ENSG00000149292	5302
+ENSG00000149308	7534
+ENSG00000149483	6348
+ENSG00000149679	3785
+ENSG00000150779	1166
+ENSG00000150991	3898
+ENSG00000151623	6607
+ENSG00000151657	6844
+ENSG00000151690	6031
+ENSG00000151718	9966
+ENSG00000151743	3195
+ENSG00000151748	5630
+ENSG00000151876	3014
+ENSG00000152270	11707
+ENSG00000152377	6174
+ENSG00000152457	6326
+ENSG00000152527	9835
+ENSG00000152642	4802
+ENSG00000152782	7638
+ENSG00000153130	6294
+ENSG00000153147	8385
+ENSG00000154065	6391
+ENSG00000154146	1309
+ENSG00000154269	3885
+ENSG00000154608	4737
+ENSG00000154743	5667
+ENSG00000155100	3977
+ENSG00000155313	7823
+ENSG00000155621	4973
+ENSG00000155636	4784
+ENSG00000155729	4915
+ENSG00000156050	5056
+ENSG00000156831	2991
+ENSG00000156973	2850
+ENSG00000157107	6127
+ENSG00000157343	1418
+ENSG00000157423	23408
+ENSG00000157625	12711
+ENSG00000157637	7716
+ENSG00000157703	6302
+ENSG00000157823	9237
+ENSG00000157833	5939
+ENSG00000158092	3727
+ENSG00000158158	5105
+ENSG00000158296	6055
+ENSG00000158406	397
+ENSG00000158467	6858
+ENSG00000158470	4722
+ENSG00000158669	7939
+ENSG00000159173	7524
+ENSG00000159259	5118
+ENSG00000159346	3193
+ENSG00000159921	5477
+ENSG00000160051	2478
+ENSG00000160172	3402
+ENSG00000160199	7621
+ENSG00000160213	2490
+ENSG00000161040	4009
+ENSG00000161277	1825
+ENSG00000161328	2769
+ENSG00000161692	7146
+ENSG00000161888	2839
+ENSG00000162063	6309
+ENSG00000162227	3149
+ENSG00000162402	11708
+ENSG00000162444	861
+ENSG00000162526	2191
+ENSG00000162669	5827
+ENSG00000162819	4980
+ENSG00000162851	1848
+ENSG00000162980	6716
+ENSG00000163083	3204
+ENSG00000163156	3265
+ENSG00000163191	680
+ENSG00000163320	6490
+ENSG00000163322	7777
+ENSG00000163348	3630
+ENSG00000163376	4736
+ENSG00000163521	3411
+ENSG00000163577	5593
+ENSG00000163743	6565
+ENSG00000163832	3313
+ENSG00000163848	10574
+ENSG00000163900	8668
+ENSG00000163960	11291
+ENSG00000164073	5023
+ENSG00000164104	2775
+ENSG00000164144	4290
+ENSG00000164209	8283
+ENSG00000164241	7190
+ENSG00000164252	5051
+ENSG00000164308	7422
+ENSG00000164332	2676
+ENSG00000164418	7375
+ENSG00000164535	5756
+ENSG00000164597	8467
+ENSG00000164778	3395
+ENSG00000164902	4319
+ENSG00000164930	4065
+ENSG00000164934	9199
+ENSG00000164967	1092
+ENSG00000164985	6768
+ENSG00000164989	15144
+ENSG00000165030	2085
+ENSG00000165055	6749
+ENSG00000165275	2866
+ENSG00000165283	1806
+ENSG00000165410	7179
+ENSG00000165521	11000
+ENSG00000165629	2326
+ENSG00000165671	15242
+ENSG00000165816	3301
+ENSG00000165905	3115
+ENSG00000165959	15701
+ENSG00000165983	4003
+ENSG00000166068	8260
+ENSG00000166340	4901
+ENSG00000166343	2423
+ENSG00000166398	7459
+ENSG00000166788	3070
+ENSG00000166974	6180
+ENSG00000167195	2082
+ENSG00000167258	12773
+ENSG00000167614	7068
+ENSG00000167642	5199
+ENSG00000167645	3193
+ENSG00000167685	7239
+ENSG00000167785	8184
+ENSG00000167815	2093
+ENSG00000167842	3157
+ENSG00000167930	4699
+ENSG00000167977	3420
+ENSG00000168005	2807
+ENSG00000168078	2165
+ENSG00000168096	7868
+ENSG00000168159	3285
+ENSG00000168306	3859
+ENSG00000168495	5611
+ENSG00000168612	2769
+ENSG00000168661	2745
+ENSG00000168758	6157
+ENSG00000168826	2889
+ENSG00000168916	6922
+ENSG00000169116	5034
+ENSG00000169188	3245
+ENSG00000169302	7776
+ENSG00000169372	4276
+ENSG00000169519	5499
+ENSG00000169627	1365
+ENSG00000169660	4153
+ENSG00000169762	7828
+ENSG00000169946	7775
+ENSG00000169957	2395
+ENSG00000170011	5872
+ENSG00000170113	8700
+ENSG00000170142	3074
+ENSG00000170296	2266
+ENSG00000170340	2861
+ENSG00000170412	8005
+ENSG00000170619	2917
+ENSG00000170638	3153
+ENSG00000170889	4371
+ENSG00000170949	8511
+ENSG00000171055	8938
+ENSG00000171067	3406
+ENSG00000171163	3929
+ENSG00000171606	8333
+ENSG00000172009	7464
+ENSG00000172058	3285
+ENSG00000172086	1753
+ENSG00000172186	594
+ENSG00000172239	3391
+ENSG00000172315	3520
+ENSG00000172458	3327
+ENSG00000172746	497
+ENSG00000173153	3030
+ENSG00000173163	2392
+ENSG00000173366	3804
+ENSG00000173480	4204
+ENSG00000173674	4427
+ENSG00000173681	8630
+ENSG00000173715	3765
+ENSG00000173848	4175
+ENSG00000174442	4849
+ENSG00000174469	10864
+ENSG00000174483	6145
+ENSG00000174705	7977
+ENSG00000174799	8640
+ENSG00000174996	5479
+ENSG00000175220	6138
+ENSG00000175322	15787
+ENSG00000175390	9423
+ENSG00000176396	1981
+ENSG00000176407	7968
+ENSG00000176444	3670
+ENSG00000176593	8787
+ENSG00000176871	4616
+ENSG00000177239	8331
+ENSG00000177352	1781
+ENSG00000177788	1155
+ENSG00000177932	5411
+ENSG00000178184	4224
+ENSG00000178229	3659
+ENSG00000178233	5459
+ENSG00000178719	2527
+ENSG00000178935	2989
+ENSG00000178950	15155
+ENSG00000178996	5559
+ENSG00000179119	6005
+ENSG00000179163	2047
+ENSG00000179455	5731
+ENSG00000179918	2551
+ENSG00000179988	6118
+ENSG00000180178	8075
+ENSG00000180182	9656
+ENSG00000180818	2331
+ENSG00000180881	6689
+ENSG00000180884	4234
+ENSG00000180979	8114
+ENSG00000181220	8260
+ENSG00000181359	853
+ENSG00000181444	2734
+ENSG00000181472	3090
+ENSG00000181690	7833
+ENSG00000181915	3627
+ENSG00000182021	5046
+ENSG00000182134	5092
+ENSG00000182141	4956
+ENSG00000182324	4340
+ENSG00000182484	4618
+ENSG00000182810	2169
+ENSG00000182827	3578
+ENSG00000182841	5092
+ENSG00000182923	10116
+ENSG00000183022	643
+ENSG00000183171	2143
+ENSG00000183172	1731
+ENSG00000183309	7146
+ENSG00000183474	6989
+ENSG00000183506	7474
+ENSG00000183530	11809
+ENSG00000183569	2821
+ENSG00000184216	4001
+ENSG00000184319	4250
+ENSG00000184481	3447
+ENSG00000184731	3968
+ENSG00000184831	2353
+ENSG00000185189	4924
+ENSG00000185238	5467
+ENSG00000185482	2096
+ENSG00000185504	5354
+ENSG00000185670	3348
+ENSG00000185798	1797
+ENSG00000185885	1080
+ENSG00000186056	4024
+ENSG00000186106	5820
+ENSG00000186130	4106
+ENSG00000186532	6777
+ENSG00000186583	2007
+ENSG00000186743	740
+ENSG00000187144	4506
+ENSG00000187172	4080
+ENSG00000187186	1833
+ENSG00000187187	9007
+ENSG00000187210	6463
+ENSG00000187504	746
+ENSG00000187605	12271
+ENSG00000187994	4201
+ENSG00000188021	4230
+ENSG00000188039	8495
+ENSG00000188206	5007
+ENSG00000188295	1980
+ENSG00000188529	10277
+ENSG00000188549	7510
+ENSG00000188738	23761
+ENSG00000188786	8024
+ENSG00000188997	4164
+ENSG00000189143	4702
+ENSG00000189221	5528
+ENSG00000189343	883
+ENSG00000196074	6466
+ENSG00000196119	1146
+ENSG00000196123	4780
+ENSG00000196263	6783
+ENSG00000196305	7401
+ENSG00000196357	3953
+ENSG00000196419	3288
+ENSG00000196507	2305
+ENSG00000196549	9620
+ENSG00000196632	11472
+ENSG00000196696	6114
+ENSG00000196739	13988
+ENSG00000196741	1253
+ENSG00000196747	503
+ENSG00000196812	1328
+ENSG00000197045	7966
+ENSG00000197062	2933
+ENSG00000197121	12761
+ENSG00000197128	6096
+ENSG00000197535	12988
+ENSG00000197744	327
+ENSG00000197888	2077
+ENSG00000197961	7251
+ENSG00000198046	8810
+ENSG00000198155	2804
+ENSG00000198198	17331
+ENSG00000198431	6708
+ENSG00000198464	4836
+ENSG00000198482	5432
+ENSG00000198515	3581
+ENSG00000198520	3166
+ENSG00000198648	4247
+ENSG00000198746	2225
+ENSG00000198752	7958
+ENSG00000198865	3431
+ENSG00000198929	7771
+ENSG00000198961	5217
+ENSG00000203441	872
+ENSG00000203616	333
+ENSG00000203668	7078
+ENSG00000203837	2354
+ENSG00000203865	2458
+ENSG00000203950	1265
+ENSG00000203995	4694
+ENSG00000204253	881
+ENSG00000204334	601
+ENSG00000204348	2483
+ENSG00000204385	3369
+ENSG00000204392	1077
+ENSG00000204394	6081
+ENSG00000204396	3638
+ENSG00000204628	5990
+ENSG00000204805	342
+ENSG00000204859	4503
+ENSG00000204956	6252
+ENSG00000205084	6015
+ENSG00000205208	3730
+ENSG00000205268	7425
+ENSG00000205485	3110
+ENSG00000205560	4102
+ENSG00000205628	3484
+ENSG00000206192	2978
+ENSG00000206560	11279
+ENSG00000210100	69
+ENSG00000210174	65
+ENSG00000211584	4921
+ENSG00000213033	1212
+ENSG00000213062	911
+ENSG00000213066	15159
+ENSG00000213077	2281
+ENSG00000213148	464
+ENSG00000213174	414
+ENSG00000213197	694
+ENSG00000213318	783
+ENSG00000213339	3430
+ENSG00000213493	1451
+ENSG00000213588	3014
+ENSG00000213711	814
+ENSG00000213742	5308
+ENSG00000213760	2147
+ENSG00000213793	551
+ENSG00000213864	676
+ENSG00000213880	797
+ENSG00000213904	4208
+ENSG00000213906	3233
+ENSG00000213917	815
+ENSG00000213971	5091
+ENSG00000214029	15455
+ENSG00000214174	3858
+ENSG00000214389	784
+ENSG00000214617	4479
+ENSG00000214694	5490
+ENSG00000214810	311
+ENSG00000214961	1372
+ENSG00000214975	499
+ENSG00000215286	754
+ENSG00000215333	1283
+ENSG00000216854	553
+ENSG00000216915	1495
+ENSG00000217716	494
+ENSG00000217801	2171
+ENSG00000218965	609
+ENSG00000219553	723
+ENSG00000220131	354
+ENSG00000220157	961
+ENSG00000220483	871
+ENSG00000221843	6199
+ENSG00000221909	2717
+ENSG00000222046	1869
+ENSG00000223382	1326
+ENSG00000223620	1102
+ENSG00000223877	622
+ENSG00000224016	291
+ENSG00000224520	1447
+ENSG00000224578	1377
+ENSG00000224628	1519
+ENSG00000224664	316
+ENSG00000224892	997
+ENSG00000225405	390
+ENSG00000225544	392
+ENSG00000225787	306
+ENSG00000225806	1521
+ENSG00000226067	2075
+ENSG00000226086	822
+ENSG00000226114	361
+ENSG00000226144	454
+ENSG00000226232	1728
+ENSG00000226268	959
+ENSG00000226478	1126
+ENSG00000226703	812
+ENSG00000226752	7181
+ENSG00000226790	1139
+ENSG00000226833	1438
+ENSG00000227006	861
+ENSG00000227057	3115
+ENSG00000227343	600
+ENSG00000227376	552
+ENSG00000227401	284
+ENSG00000227543	3835
+ENSG00000227666	316
+ENSG00000227742	946
+ENSG00000227968	999
+ENSG00000228118	459
+ENSG00000228195	881
+ENSG00000228236	315
+ENSG00000228599	742
+ENSG00000228612	2737
+ENSG00000228981	843
+ENSG00000229044	439
+ENSG00000229344	682
+ENSG00000229503	477
+ENSG00000229956	6794
+ENSG00000230006	8042
+ENSG00000230022	634
+ENSG00000230074	665
+ENSG00000230118	258
+ENSG00000230146	1176
+ENSG00000230243	319
+ENSG00000230295	351
+ENSG00000230406	421
+ENSG00000230531	1798
+ENSG00000230551	8636
+ENSG00000230650	3130
+ENSG00000230667	909
+ENSG00000230863	742
+ENSG00000230869	2418
+ENSG00000230913	744
+ENSG00000231096	390
+ENSG00000231181	559
+ENSG00000231245	402
+ENSG00000231434	2167
+ENSG00000231615	1337
+ENSG00000231711	4947
+ENSG00000231955	1411
+ENSG00000232186	1228
+ENSG00000232581	357
+ENSG00000232676	1124
+ENSG00000232699	736
+ENSG00000232905	946
+ENSG00000232943	400
+ENSG00000233122	2436
+ENSG00000233454	275
+ENSG00000233503	1501
+ENSG00000233602	619
+ENSG00000233836	3242
+ENSG00000233846	487
+ENSG00000234231	2095
+ENSG00000234639	1239
+ENSG00000234722	3487
+ENSG00000234742	555
+ENSG00000234981	792
+ENSG00000235065	475
+ENSG00000235363	225
+ENSG00000235424	288
+ENSG00000235444	618
+ENSG00000235512	292
+ENSG00000235623	574
+ENSG00000235655	411
+ENSG00000235698	1200
+ENSG00000235750	4783
+ENSG00000235847	965
+ENSG00000235859	1234
+ENSG00000235892	1677
+ENSG00000236086	262
+ENSG00000236285	837
+ENSG00000236290	703
+ENSG00000236330	886
+ENSG00000236468	1335
+ENSG00000236570	1227
+ENSG00000236680	1238
+ENSG00000236681	523
+ENSG00000236735	375
+ENSG00000236739	535
+ENSG00000236753	2715
+ENSG00000236801	474
+ENSG00000236824	13458
+ENSG00000236946	1087
+ENSG00000237017	4158
+ENSG00000237033	609
+ENSG00000237054	3194
+ENSG00000237101	1323
+ENSG00000237357	2579
+ENSG00000237517	7448
+ENSG00000237939	652
+ENSG00000237977	563
+ENSG00000238221	500
+ENSG00000238251	514
+ENSG00000239377	420
+ENSG00000239524	400
+ENSG00000239569	736
+ENSG00000239791	1918
+ENSG00000239887	4495
+ENSG00000239926	747
+ENSG00000240005	589
+ENSG00000240392	575
+ENSG00000240418	893
+ENSG00000240540	1183
+ENSG00000240821	579
+ENSG00000241258	3540
+ENSG00000241370	1606
+ENSG00000241494	438
+ENSG00000241680	375
+ENSG00000241697	2611
+ENSG00000241772	1051
+ENSG00000241923	622
+ENSG00000242061	438
+ENSG00000242140	231
+ENSG00000242349	1427
+ENSG00000242600	2616
+ENSG00000242612	4046
+ENSG00000242858	602
+ENSG00000243122	413
+ENSG00000243396	402
+ENSG00000243701	4206
+ENSG00000243779	321
+ENSG00000244171	1291
+ENSG00000244270	403
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/go_terms.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,85 @@
+ENSG00000162526	GO:0000003
+ENSG00000162526	GO:0000166
+ENSG00000162526	GO:0000287
+ENSG00000162526	GO:0001882
+ENSG00000162526	GO:0001883
+ENSG00000162526	GO:0003674
+ENSG00000162526	GO:0003824
+ENSG00000162526	GO:0004672
+ENSG00000162526	GO:0004674
+ENSG00000162526	GO:0005488
+ENSG00000162526	GO:0005515
+ENSG00000162526	GO:0005524
+ENSG00000162526	GO:0005575
+ENSG00000162526	GO:0005622
+ENSG00000162526	GO:0005623
+ENSG00000162526	GO:0005737
+ENSG00000162526	GO:0006464
+ENSG00000162526	GO:0006468
+ENSG00000162526	GO:0006793
+ENSG00000162526	GO:0006796
+ENSG00000162526	GO:0007154
+ENSG00000162526	GO:0007165
+ENSG00000162526	GO:0007275
+ENSG00000162526	GO:0007276
+ENSG00000162526	GO:0007283
+ENSG00000162526	GO:0008150
+ENSG00000162526	GO:0008152
+ENSG00000162526	GO:0009987
+ENSG00000162526	GO:0016301
+ENSG00000162526	GO:0016310
+ENSG00000162526	GO:0016740
+ENSG00000162526	GO:0016772
+ENSG00000162526	GO:0016773
+ENSG00000162526	GO:0017076
+ENSG00000162526	GO:0019538
+ENSG00000162526	GO:0019953
+ENSG00000162526	GO:0022414
+ENSG00000162526	GO:0023052
+ENSG00000162526	GO:0030154
+ENSG00000162526	GO:0030554
+ENSG00000162526	GO:0032501
+ENSG00000162526	GO:0032502
+ENSG00000162526	GO:0032504
+ENSG00000162526	GO:0032549
+ENSG00000162526	GO:0032550
+ENSG00000162526	GO:0032553
+ENSG00000162526	GO:0032555
+ENSG00000162526	GO:0032559
+ENSG00000162526	GO:0035556
+ENSG00000162526	GO:0035639
+ENSG00000162526	GO:0036094
+ENSG00000162526	GO:0036211
+ENSG00000162526	GO:0043167
+ENSG00000162526	GO:0043168
+ENSG00000162526	GO:0043169
+ENSG00000162526	GO:0043170
+ENSG00000162526	GO:0043412
+ENSG00000162526	GO:0044237
+ENSG00000162526	GO:0044238
+ENSG00000162526	GO:0044260
+ENSG00000162526	GO:0044267
+ENSG00000162526	GO:0044424
+ENSG00000162526	GO:0044464
+ENSG00000162526	GO:0044699
+ENSG00000162526	GO:0044700
+ENSG00000162526	GO:0044702
+ENSG00000162526	GO:0044703
+ENSG00000162526	GO:0044707
+ENSG00000162526	GO:0044763
+ENSG00000162526	GO:0044767
+ENSG00000162526	GO:0046872
+ENSG00000162526	GO:0048232
+ENSG00000162526	GO:0048609
+ENSG00000162526	GO:0048869
+ENSG00000162526	GO:0050789
+ENSG00000162526	GO:0050794
+ENSG00000162526	GO:0050896
+ENSG00000162526	GO:0051704
+ENSG00000162526	GO:0051716
+ENSG00000162526	GO:0065007
+ENSG00000162526	GO:0071704
+ENSG00000162526	GO:0097159
+ENSG00000162526	GO:0097367
+ENSG00000162526	GO:1901265
+ENSG00000162526	GO:1901363
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.fasta	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,2 @@
+>1
+AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.gtf	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,6 @@
+1	ensembl_havana	gene	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";
+1	ensembl_havana	transcript	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	exon	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	CDS	1	100	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	start_codon	1	3	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	stop_codon	101	103	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,1 @@
+ENSG00000162526	103
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/wal.tab	Thu Mar 03 09:56:51 2016 -0500
@@ -0,0 +1,3 @@
+category	over_represented_pvalue	under_represented_pvalue	numDEInCat	numInCat	term	ontology
+GO:0000278	0.01123506125343	0.999376653834006	4	5	mitotic cell cycle	BP
+GO:0000003	1	0.805913166914891	0	1	reproduction	BP