# HG changeset patch
# User fubar
# Date 1374890698 14400
# Node ID 91e77df18a7fb3af6c0597885f43c87a09c9c5c0
# Parent  6431647290573e50d1567c7a093e2d1680291a76
Uploaded
diff -r 643164729057 -r 91e77df18a7f rgedgeR/rgToolFactory.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgedgeR/rgToolFactory.py	Fri Jul 26 22:04:58 2013 -0400
@@ -0,0 +1,599 @@
+# rgToolFactory.py
+# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
+# 
+# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
+# 
+# all rights reserved
+# Licensed under the LGPL
+# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
+#
+# july 2013
+# added ability to combine images and individual log files into html output
+# just make sure there's a log file foo.log and it will be output
+# together with all images named like "foo_*.pdf
+# otherwise old format for html
+#
+# January 2013
+# problem pointed out by Carlos Borroto
+# added escaping for <>$ - thought I did that ages ago...
+#
+# August 11 2012 
+# changed to use shell=False and cl as a sequence
+
+# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
+# It also serves as the wrapper for the new tool.
+# 
+# you paste and run your script
+# Only works for simple scripts that read one input from the history.
+# Optionally can write one new history dataset,
+# and optionally collect any number of outputs into links on an autogenerated HTML page.
+
+# DO NOT install on a public or important site - please.
+
+# installed generated tools are fine if the script is safe.
+# They just run normally and their user cannot do anything unusually insecure
+# but please, practice safe toolshed.
+# Read the fucking code before you install any tool 
+# especially this one
+
+# After you get the script working on some test data, you can
+# optionally generate a toolshed compatible gzip file
+# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
+# safe and largely automated installation in a production Galaxy.
+
+# If you opt for an HTML output, you get all the script outputs arranged
+# as a single Html history item - all output files are linked, thumbnails for all the pdfs.
+# Ugly but really inexpensive.
+# 
+# Patches appreciated please. 
+#
+#
+# long route to June 2012 product
+# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
+# derived from an integrated script model  
+# called rgBaseScriptWrapper.py
+# Note to the unwary:
+#   This tool allows arbitrary scripting on your Galaxy as the Galaxy user
+#   There is nothing stopping a malicious user doing whatever they choose
+#   Extremely dangerous!!
+#   Totally insecure. So, trusted users only
+#
+# preferred model is a developer using their throw away workstation instance - ie a private site.
+# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
+#
+
+import sys 
+import shutil 
+import subprocess 
+import os 
+import time 
+import tempfile 
+import optparse
+import tarfile
+import re
+import shutil
+import math
+
+progname = os.path.split(sys.argv[0])[1] 
+myversion = 'V000.2 June 2012' 
+verbose = False 
+debug = False
+toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
+
+def timenow():
+    """return current time as a string
+    """
+    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+html_escape_table = {
+     "&": "&",
+     ">": ">",
+     "<": "<",
+     "$": "\$"
+     }
+
+def html_escape(text):
+     """Produce entities within text."""
+     return "".join(html_escape_table.get(c,c) for c in text)
+
+def cmd_exists(cmd):
+     return subprocess.call("type " + cmd, shell=True, 
+           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
+
+
+class ScriptRunner:
+    """class is a wrapper for an arbitrary script
+    """
+
+    def __init__(self,opts=None,treatbashSpecial=True):
+        """
+        cleanup inputs, setup some outputs
+        
+        """
+        self.useGM = cmd_exists('gm')
+        self.useIM = cmd_exists('convert')
+        self.useGS = cmd_exists('gs')
+        self.treatbashSpecial = treatbashSpecial
+        if opts.output_dir: # simplify for the tool tarball
+            os.chdir(opts.output_dir)
+        self.thumbformat = 'jpg'
+        self.opts = opts
+        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
+        self.toolid = self.toolname
+        self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
+        self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
+        self.xmlfile = '%s.xml' % self.toolname
+        s = open(self.opts.script_path,'r').readlines()
+        s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
+        self.script = '\n'.join(s)
+        fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
+        tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
+        tscript.write(self.script)
+        tscript.close()
+        self.indentedScript = '\n'.join([' %s' % x for x in s]) # for restructured text in help
+        self.escapedScript = '\n'.join([html_escape(x) for x in s])
+        if opts.output_dir: # may not want these complexities 
+            self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
+            art = '%s.%s' % (self.toolname,opts.interpreter)
+            artpath = os.path.join(self.opts.output_dir,art) # need full path
+            artifact = open(artpath,'w') # use self.sfile as script source for Popen
+            artifact.write(self.script)
+            artifact.close()
+        self.cl = []
+        self.html = []
+        a = self.cl.append
+        a(opts.interpreter)
+        if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
+            a(self.sfile)
+        else:
+            a('-') # stdin
+        a(opts.input_tab)
+        a(opts.output_tab)
+        self.outFormats = 'tabular' # TODO make this an option at tool generation time
+        self.inputFormats = 'tabular' # TODO make this an option at tool generation time
+        self.test1Input = '%s_test1_input.xls' % self.toolname
+        self.test1Output = '%s_test1_output.xls' % self.toolname
+        self.test1HTML = '%s_test1_output.html' % self.toolname
+
+    def makeXML(self):
+        """
+        Create a Galaxy xml tool wrapper for the new script as a string to write out
+        fixme - use templating or something less fugly than this example of what we produce
+
+        
+            a tabular file 
+            
+            reverse.py --script_path "$runMe" --interpreter "python" 
+            --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file" 
+             
+            
+             
+            
+             
+            
+            
+**What it Does**
+
+Reverse the columns in a tabular file
+
+             
+            
+            
+            
+# reverse order of columns in a tabular file
+import sys
+inp = sys.argv[1]
+outp = sys.argv[2]
+i = open(inp,'r')
+o = open(outp,'w')
+for row in i:
+     rs = row.rstrip().split('\t')
+     rs.reverse()
+     o.write('\t'.join(rs))
+     o.write('\n')
+i.close()
+o.close()
+ 
+
+             
+             
+             
+        
+        """ 
+        newXML="""
+            %(tooldesc)s
+            %(command)s
+            
+            %(inputs)s
+             
+            
+            %(outputs)s
+             
+            
+            
+            %(script)s
+             
+             
+            %(tooltests)s
+            
+            %(help)s
+             
+             """ # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
+               
+        newCommand="""
+            %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" 
+            --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s 
+             """ # may NOT be an input or htmlout
+        tooltestsTabOnly = """
+         
+         
+         %s ' % self.opts.tool_desc
+        else:
+            xdict['tooldesc'] = ''
+        xdict['command_outputs'] = '' 
+        xdict['outputs'] = '' 
+        if self.opts.input_tab <> 'None':
+            xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
+            xdict['inputs'] = '
  
+        """ 
+        galhtmlattr = """
""" 
+        galhtmlpostfix = """
Galaxy Tool "%s" run at %s
%s %s %s %s %s images and outputs
' % sectionname)
+                    html.append('(Click on a thumbnail image to download the corresponding original PDF image)\n')
+                    for i,paths in enumerate(ourpdfs): 
+                        fname,thumb = paths
+                        s= """ \n'
+                            ntogo = 0
+                            if i < (npdf - 1): # more to come
+                               s += ''
+                               ntogo = nacross
+                        else:
+                            ntogo -= 1
+                        html.append(s)
+                    if html[-1].strip().endswith(' '):
+                        html.append('
  '*ntogo)
+                        html.append('\n')
+                logt = open(logfname,'r').readlines()
+                logtext = [x for x in logt if x.strip() > '']
+                html.append('%s log output
' % sectionname)
+                if len(logtext) > 1:
+                    html.append('\n\n')
+                    html += logtext
+                    html.append('\n \n')
+                else:
+                    html.append('%s is emptyOutput File Name (click to view) Size 
All output files available for downloading
\n')
+           html += fhtml # add all non-pdf files to the end of the display
+        else:
+            html.append('### Error - %s returned no files - please confirm that parameters are sane
' % self.opts.interpreter)
+        html.append(galhtmlpostfix)
+        htmlf = file(self.opts.output_html,'w')
+        htmlf.write('\n'.join(html))
+        htmlf.write('\n')
+        htmlf.close()
+        self.html = html
+
+
+    def run(self):
+        """
+        scripts must be small enough not to fill the pipe!
+        """
+        if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
+          retval = self.runBash()
+        else:
+            if self.opts.output_dir:
+                sto = open(self.tlog,'w')
+                sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
+                sto.flush()
+                p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
+            else:
+                p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
+            p.stdin.write(self.script)
+            p.stdin.close()
+            retval = p.wait()
+            if self.opts.output_dir:
+                sto.close()
+            if self.opts.make_HTML:
+                self.makeHtml()
+        return retval
+
+    def runBash(self):
+        """
+        cannot use - for bash so use self.sfile
+        """
+        if self.opts.output_dir:
+            s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
+            sto = open(self.tlog,'w')
+            sto.write(s)
+            sto.flush()
+            p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        else:
+            p = subprocess.Popen(self.cl,shell=False)            
+        retval = p.wait()
+        if self.opts.output_dir:
+            sto.close()
+        if self.opts.make_HTML:
+            self.makeHtml()
+        return retval
+  
+
+def main():
+    u = """
+    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
+    rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
+     
+    """
+    op = optparse.OptionParser()
+    a = op.add_option
+    a('--script_path',default=None)
+    a('--tool_name',default=None)
+    a('--interpreter',default=None)
+    a('--output_dir',default=None)
+    a('--output_html',default=None)
+    a('--input_tab',default="None")
+    a('--output_tab',default="None")
+    a('--user_email',default='Unknown')
+    a('--bad_user',default=None)
+    a('--make_Tool',default=None)
+    a('--make_HTML',default=None)
+    a('--help_text',default=None)
+    a('--tool_desc',default=None)
+    a('--new_tool',default=None)
+    a('--tool_version',default=None)
+    opts, args = op.parse_args()
+    assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
+    assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
+    assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
+    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
+    if opts.output_dir:
+        try:
+            os.makedirs(opts.output_dir)
+        except:
+            pass
+    r = ScriptRunner(opts)
+    if opts.make_Tool:
+        retcode = r.makeTooltar()
+    else:
+        retcode = r.run()
+    os.unlink(r.sfile)
+    if retcode:
+        sys.exit(retcode) # indicate failure to job runner
+
+
+if __name__ == "__main__":
+    main()
+
+
diff -r 643164729057 -r 91e77df18a7f rgedgeR/rgedgeRpaired.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgedgeR/rgedgeRpaired.xml	Fri Jul 26 22:04:58 2013 -0400
@@ -0,0 +1,918 @@
+
+  models using BioConductor packages 
+  
+      biocbasics 
+      package_r3 
+   
+  
+  
+     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" 
+    --output_dir "$html_file.files_path" --output_html "$html_file" --output_tab "$outtab" --make_HTML "yes"
+   
+  
+    
+         
+    
+    
+         
+    
+    
+    Do not run edgeR 
+      Run edgeR 
+     
+     
+       
+       
+     
+    
+    Do not run DESeq2 
+      Run DESeq2 (only works if NO second GLM factor supplied at present) 
+     
+     
+         Parametric (default) fit for dispersions 
+            Local fit - use this if parametric fails 
+            Mean dispersion fit- use this if you really understand what you're doing - read the fine manual 
+          
+      
+       
+     
+    Do not run VOOM 
+      Run VOOM 
+     
+    
+    Do not run GSEA tests with the Camera algorithm 
+    Run GSEA tests with the Camera algorithm 
+    
+     
+     
+      Use a built-in gene set 
+        Use a gene set from my history 
+        Add a gene set from my history to a built in gene set 
+      
+      
+        
+             
+        
+       
+      
+         
+      
+        
+             
+        
+        
+      
+      
+      
+      
+     
+    fdr 
+            Benjamini Hochberg 
+            Benjamini Yukateli 
+            Bonferroni 
+            Hochberg 
+            Holm 
+            Hommel 
+            no control for multiple tests 
+    
+   
+  
+     
+ 
+      
+ 
+
+ 
+ 
+
+
+
+ nsamp) {
+      dm =dm[1:nsamp,]
+      #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
+    }
+    newcolnames = substr(colnames(dm),1,20)
+    colnames(dm) = newcolnames
+    pdf(outpdfname)
+    heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
+         Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
+    dev.off()
+}
+
+hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
+{
+    # for 2 groups only was
+    #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
+    #pcols = unlist(lapply(group,col.map))
+    gu = unique(group)
+    colours = rainbow(length(gu),start=0.3,end=0.6)
+    pcols = colours[match(group,gu)]
+    nrows = nrow(cmat)
+    mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
+    if (nrows > nsamp)  {
+               cmat = cmat[c(1:nsamp),]
+               mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
+          }
+    newcolnames = substr(colnames(cmat),1,20)
+    colnames(cmat) = newcolnames
+    pdf(outpdfname)
+    heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
+    dev.off()
+}
+
+qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
+# stolen from https://gist.github.com/703512
+{
+    o = -log10(sort(pvector,decreasing=F))
+    e = -log10( 1:length(o)/length(o) )
+    o[o==-Inf] = reallysmall
+    o[o==Inf] = reallybig
+    maint = descr
+    pdf(outpdf)
+    plot(e,o,pch=19,cex=1, main=maint, ...,
+        xlab=expression(Expected~~-log[10](italic(p))),
+        ylab=expression(Observed~~-log[10](italic(p))),
+        xlim=c(0,max(e)), ylim=c(0,max(o)))
+    lines(e,e,col="red")
+    grid(col = "lightgray", lty = "dotted")
+    dev.off()
+}
+
+smearPlot = function(DGEList,deTags, outSmear, outMain)
+        {
+        pdf(outSmear)
+        plotSmear(DGEList,de.tags=deTags,main=outMain)
+        grid(col="lightgray", lty="dotted")
+        dev.off()
+        }
+        
+boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
+{   # 
+        nc = ncol(rawrs)
+        for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
+        fullnames = colnames(rawrs)
+        newcolnames = substr(colnames(rawrs),1,20)
+        colnames(rawrs) = newcolnames
+        newcolnames = substr(colnames(cleanrs),1,20)
+        colnames(cleanrs) = newcolnames
+        defpar = par(no.readonly=T)
+        print.noquote('raw contig counts by sample:')
+        print.noquote(summary(rawrs))
+        print.noquote('normalised contig counts by sample:')
+        print.noquote(summary(cleanrs))
+        pdf(pdfname)
+        par(mfrow=c(1,2))
+        boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
+        grid(col="lightgray",lty="dotted")
+        boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
+        grid(col="lightgray",lty="dotted")
+        dev.off()
+        pdfname = "sample_counts_histogram.pdf" 
+        nc = ncol(rawrs)
+        print.noquote(paste('Using ncol rawrs=',nc))
+        ncroot = round(sqrt(nc))
+        if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
+        m = c()
+        for (i in c(1:nc)) {
+              rhist = hist(rawrs[,i],breaks=100,plot=F)
+              m = append(m,max(rhist\$counts))
+             }
+        ymax = max(m)
+        pdf(pdfname)
+        par(mfrow=c(ncroot,ncroot))
+        for (i in c(1:nc)) {
+                 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", 
+                 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
+             }
+        dev.off()
+        par(defpar)
+      
+}
+
+cumPlot = function(rawrs,cleanrs,maint,myTitle)
+{   # updated to use ecdf
+        pdfname = "Filtering_rowsum_bar_charts.pdf"
+        defpar = par(no.readonly=T)
+        lrs = log(rawrs,10) 
+        lim = max(lrs)
+        pdf(pdfname)
+        par(mfrow=c(2,1))
+        hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
+             ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
+        grid(col="lightgray", lty="dotted")
+        lrs = log(cleanrs,10) 
+        hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
+             ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
+        grid(col="lightgray", lty="dotted")
+        dev.off()
+        par(defpar)
+}
+
+cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
+{   # updated to use ecdf
+        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
+        pdf(pdfname)
+        par(mfrow=c(2,1))
+        lastx = max(rawrs)
+        rawe = knots(ecdf(rawrs))
+        cleane = knots(ecdf(cleanrs))
+        cy = 1:length(cleane)/length(cleane)
+        ry = 1:length(rawe)/length(rawe)
+        plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
+             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
+        grid(col="blue")
+        plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
+             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
+        grid(col="blue")
+        dev.off()
+}
+
+
+
+doGSEA = function(y=NULL,design=NULL,histgmt="",
+                  bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
+                  ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
+{
+  sink('Camera.log')
+  genesets = c()
+  if (bigmt > "")
+  {
+    bigenesets = readLines(bigmt)
+    genesets = bigenesets
+  }
+  if (histgmt > "")
+  {
+    hgenesets = readLines(histgmt)
+    if (bigmt > "") {
+      genesets = rbind(genesets,hgenesets)
+    } else {
+      genesets = hgenesets
+    } # use only history if no bi
+  }
+  print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
+  genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
+  outf = outfname
+  head=paste(myTitle,'edgeR GSEA')
+  write(head,file=outfname,append=F)
+  ntest=length(genesets)
+  urownames = toupper(rownames(y))
+  upcam = c()
+  downcam = c()
+  for (i in 1:ntest) {
+    gs = unlist(genesets[i])
+    g = gs[1] # geneset_id
+    u = gs[2]
+    if (u > "") { u = paste("",u," ",sep="") }
+    glist = gs[3:length(gs)] # member gene symbols
+    glist = toupper(glist)
+    inglist = urownames %in% glist
+    nin = sum(inglist)
+    if ((nin > minnin) && (nin < maxnin)) {
+      ### print(paste('@@found',sum(inglist),'genes in glist'))
+      camres = camera(y=y,index=inglist,design=design)
+      if (! is.null(camres)) {
+      rownames(camres) = g # gene set name
+      camres = cbind(GeneSet=g,URL=u,camres)
+      if (camres\$Direction == "Up") 
+        {
+        upcam = rbind(upcam,camres) } else {
+          downcam = rbind(downcam,camres)
+        }
+      }
+   }
+  }
+  uscam = upcam[order(upcam\$PValue),]
+  unadjp = uscam\$PValue
+  uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
+  nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
+  dscam = downcam[order(downcam\$PValue),]
+  unadjp = dscam\$PValue
+  dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
+  ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
+  write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
+  write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
+  print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
+  write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
+  print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
+  write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
+  sink()
+}
+
+
+
+edgeIt = function (Count_Matrix,group,outputfilename,fdrtype='fdr',priordf=5, 
+        fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
+        filterquantile=0.2, subjects=c(),mydesign=NULL,
+        doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
+        histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
+        doCook=F,DESeq_fitType="parameteric") 
+{
+  # Error handling
+  if (length(unique(group))!=2){
+    print("Number of conditions identified in experiment does not equal 2")
+    q()
+    }
+  require(edgeR)
+  options(width = 512) 
+  mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
+  allN = nrow(Count_Matrix)
+  nscut = round(ncol(Count_Matrix)/2)
+  colTotmillionreads = colSums(Count_Matrix)/1e6
+  counts.dataframe = as.data.frame(c()) 
+  rawrs = rowSums(Count_Matrix)
+  nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
+  nzN = nrow(nonzerod)
+  nzrs = rowSums(nonzerod)
+  zN = allN - nzN
+  print('# Quantiles for non-zero row counts:',quote=F)
+  print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
+  if (useNDF == "T")
+  {
+    gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
+    lo = colSums(Count_Matrix[!gt1rpin3,])
+    workCM = Count_Matrix[gt1rpin3,]
+    cleanrs = rowSums(workCM)
+    cleanN = length(cleanrs)
+    meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
+    print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
+    maint = paste('Filter >=1/million reads in >=',nscut,'samples')
+  }   else {        
+    useme = (nzrs > quantile(nzrs,filterquantile))
+    workCM = nonzerod[useme,]
+    lo = colSums(nonzerod[!useme,])
+    cleanrs = rowSums(workCM)
+    cleanN = length(cleanrs)
+    meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
+    print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
+    maint = paste('Filter below',filterquantile,'quantile')
+  }
+  cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
+  allgenes = rownames(workCM)
+  reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
+  genecards=" 0.8) # is ucsc style string
+  {
+    print("@@ using ucsc substitution for urls")
+    contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes," ")
+  } else {
+    print("@@ using genecards substitution for urls")
+    contigurls = paste0(genecards,allgenes,"\'>",allgenes,"")
+  }
+  print.noquote("# urls")
+  print.noquote(head(contigurls))
+  print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) 
+  cmrowsums = rowSums(workCM)
+  TName=unique(group)[1]
+  CName=unique(group)[2]
+  if (is.null(mydesign)) {
+    if (length(subjects) == 0) 
+    {
+      mydesign = model.matrix(~group)
+    } 
+    else { 
+      subjf = factor(subjects)
+      mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
+    }
+  } 
+  print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
+  print.noquote('Using design matrix:')
+  print.noquote(mydesign)
+  if (doedgeR) {
+  sink('edgeR.log')
+  #### Setup DGEList object
+  DGEList = DGEList(counts=workCM, group = group)
+  DGEList = calcNormFactors(DGEList)
+
+  DGEList = estimateGLMCommonDisp(DGEList,mydesign)
+  comdisp = DGEList\$common.dispersion
+  DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
+  if (edgeR_priordf > 0) {
+    print.noquote(paste("prior.df =",edgeR_priordf))
+    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
+  } else {
+    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
+  }
+  DGLM = glmFit(DGEList,design=mydesign)
+  DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
+  efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
+  normData = (1e+06*DGEList\$counts/efflib)
+  uoutput = cbind( 
+    Name=as.character(rownames(DGEList\$counts)),
+    DE\$table,
+    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+    Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
+    DGEList\$counts
+  )
+  soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
+  goodness = gof(DGLM, pcutoff=fdrthresh)
+  if (sum(goodness\$outlier) > 0) {
+    print.noquote('GLM outliers:')
+    print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
+    } else { 
+      print('No GLM fit outlier genes found\n')
+    }
+  z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
+  pdf("edgeR_GoodnessofFit.pdf")
+  qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
+  abline(0,1,lwd=3)
+  points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
+  dev.off()
+  estpriorn = getPriorN(DGEList)
+  print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
+  efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
+  normData = (1e+06*DGEList\$counts/efflib)
+  uniqueg = unique(group)
+  #### Plot MDS
+  sample_colors =  match(group,levels(group))
+  sampleTypes = levels(factor(group))
+  print.noquote(sampleTypes)
+  pdf("edgeR_MDSplot.pdf")
+  plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
+  legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
+  grid(col="blue")
+  dev.off()
+  colnames(normData) = paste( colnames(normData),'N',sep="_")
+  print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
+  nzd = data.frame(log(nonzerod + 1e-2,10))
+  boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf")
+  write.table(soutput,outputfilename, quote=FALSE, sep="\t",row.names=F)
+  tt = cbind( 
+    Name=as.character(rownames(DGEList\$counts)),
+    DE\$table,
+    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+    Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
+  )
+  print.noquote("# edgeR Top tags\n")
+  tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
+  tt = tt[order(DE\$table\$PValue),] 
+  print.noquote(tt[1:50,])
+  deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
+  nsig = length(deTags)
+  print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
+  deColours = ifelse(deTags,'red','black')
+  pdf("edgeR_BCV_vs_abundance.pdf")
+  plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
+  dev.off()
+  dg = DGEList[order(DE\$table\$PValue),]
+  #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
+  efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
+  normData = (1e+06*dg\$counts/efflib)
+  outpdfname="edgeR_heatmap.pdf"
+  hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=myTitle)
+  outSmear = "edgeR_smearplot.pdf"
+  outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
+  smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
+  qqPlot(descr=paste(myTitle,'edgeR QQ plot'),pvector=DE\$table\$PValue,outpdf='edgeR_qqplot.pdf')
+  norm.factor = DGEList\$samples\$norm.factors
+  topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
+  edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
+  edgeRcounts = rep(0, length(allgenes))
+  edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
+  sink()
+  } ### doedgeR
+  if (doDESeq2 == T)
+  {
+    sink("DESeq2.log")
+    # DESeq2
+    require('DESeq2')
+    library('RColorBrewer')
+    pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))        
+    deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
+    #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
+    #rDESeq = results(DESeq2)
+    #newCountDataSet(workCM, group)
+    deSeqDatsizefac = estimateSizeFactors(deSEQds)
+    deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
+    resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
+    rDESeq = as.data.frame(results(resDESeq))
+    rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
+    srDESeq = rDESeq[order(rDESeq\$pvalue),]
+    qqPlot(descr=paste(myTitle,'DESeq2 qqplot'),pvector=rDESeq\$pvalue,outpdf='DESeq2_qqplot.pdf')
+    cat("# DESeq top 50\n")
+    print.noquote(srDESeq[1:50,])
+    write.table(srDESeq,paste(mt,'DESeq2_TopTable.xls',sep='_'), quote=FALSE, sep="\t",row.names=F)
+    topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
+    DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
+    DESeqcounts = rep(0, length(allgenes))
+    DESeqcounts[DESeqcountsindex] = 1
+    pdf("DESeq2_dispersion_estimates.pdf")
+    plotDispEsts(resDESeq)
+    dev.off()
+    ysmall = abs(min(rDESeq\$log2FoldChange))
+    ybig = abs(max(rDESeq\$log2FoldChange))
+    ylimit = min(4,ysmall,ybig)
+    pdf("DESeq2_MA_plot.pdf")
+    plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
+    dev.off()
+    rlogres = rlogTransformation(resDESeq)
+    sampledists = dist( t( assay(rlogres) ) )
+    sdmat = as.matrix(sampledists)
+    pdf("DESeq2_sample_distance_plot.pdf")
+    heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
+         col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
+    dev.off()
+    sink()
+    result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
+    if ("try-error" %in% class(result)) {
+         print.noquote('DESeq2 plotPCA failed.')
+         } else {
+         pdf("DESeq2_PCA_plot.pdf")
+         #### wtf - print? Seems needed to get this to work
+         print(ppca)
+         dev.off()
+        }
+  }
+
+  if (doVoom == T) {
+      sink('VOOM.log')
+      pdf("VOOM_mean_variance_plot.pdf")
+      dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
+      dev.off()
+      # Use limma to fit data
+      fit = lmFit(dat.voomed, mydesign)
+      fit = eBayes(fit)
+      rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
+      qqPlot(descr=paste(myTitle,'VOOM-limma QQ plot'),pvector=rvoom\$P.Value,outpdf='VOOM_qqplot.pdf')
+      rownames(rvoom) = rownames(workCM)
+      rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
+      srvoom = rvoom[order(rvoom\$P.Value),]
+      write.table(srvoom,paste(mt,'VOOM_topTable.xls',sep='_'), quote=FALSE, sep="\t",row.names=F)
+      # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
+      topresults.voom = srvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
+      voomcountsindex = which(allgenes %in% topresults.voom\$ID)
+      voomcounts = rep(0, length(allgenes))
+      voomcounts[voomcountsindex] = 1
+      cat("# VOOM top 50\n")
+      print(srvoom[1:50,])
+      sink()
+  }
+
+  if (doCamera) {
+  doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
+    outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
+  }
+
+  if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
+    if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
+        vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
+        counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, 
+                                       VOOM_limma = voomcounts, row.names = allgenes)
+       } else if ((doDESeq2==T) && (doedgeR==T))  {
+         vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
+         counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
+       } else if ((doVoom==T) && (doedgeR==T)) {
+        vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
+        counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
+       }
+    
+    if (nrow(counts.dataframe > 1)) {
+      counts.venn = vennCounts(counts.dataframe)
+      vennf = "Venn_significant_genes_overlap.pdf" 
+      pdf(vennf)
+      vennDiagram(counts.venn,main=vennmain,col="maroon")
+      dev.off()
+    }
+  } #### doDESeq2 or doVoom
+  #Return our main table
+  uoutput 
+  
+}
+#### Done
+
+###sink(stdout(),append=T,type="message")
+builtin_gmt=""
+history_gmt=""
+doDESeq2 = $DESeq2.doDESeq2 # make these T or F
+doVoom = $doVoom
+doCamera = $camera.doCamera
+doedgeR = $edgeR.doedgeR
+edgeR_priordf = 0
+
+#if $DESeq2.doDESeq2 == "T"
+  DESeq_fitType = "$DESeq2.DESeq_fitType"
+#end if
+#if $edgeR.doedgeR == "T"
+edgeR_priordf = $edgeR.edgeR_priordf
+#end if
+
+#if $camera.doCamera == 'T'
+#if $camera.gmtSource.refgmtSource == "indexed" or $camera.gmtSource.refgmtSource == "both":
+  builtin_gmt = "${camera.gmtSource.builtinGMT.fields.path}"
+#end if
+#if $camera.gmtSource.refgmtSource == "history" or $camera.gmtSource.refgmtSource == "both":
+    history_gmt = "${camera.gmtSource.ownGMT}"
+    history_gmt_name = "${camera.gmtSource.ownGMT.name}"
+  #end if
+#end if
+
+
+
+Out_Dir = "$html_file.files_path"
+Input =  "$input1"
+TreatmentName = "$treatment_name"
+TreatmentCols = "$Treat_cols"
+ControlName = "$control_name"
+ControlCols= "$Control_cols"
+outputfilename = "$outtab"
+org = "$input1.dbkey"
+if (org == "") { org = "hg19"}
+fdrtype = "$fdrtype"
+fdrthresh = $fdrthresh
+useNDF = "$useNDF"
+fQ = $fQ # non-differential centile cutoff
+myTitle = "$title"
+subjects = c($subjectids)
+nsubj = length(subjects)
+if (nsubj > 0) {
+if (doDESeq2) {
+ print('WARNING - cannot yet use DESeq2 for 2 way anova - see the docs')
+ doDESeq2 = F
+ }
+}
+TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
+CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 
+cat('Got TCols=')
+cat(TCols)
+cat('; CCols=')
+cat(CCols)
+cat('\n')
+useCols = c(TCols,CCols)
+if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
+Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
+snames = colnames(Count_Matrix)
+nsamples = length(snames)
+if (nsubj >  0 & nsubj != nsamples) {
+options("show.error.messages"=T)
+mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
+   'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
+write(mess, stderr())
+quit(save="no",status=4)
+}
+
+Count_Matrix = Count_Matrix[,useCols] ### reorder columns
+if (length(subjects) != 0) {subjects = subjects[useCols]}
+rn = rownames(Count_Matrix)
+islib = rn %in% c('librarySize','NotInBedRegions')
+LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
+Count_Matrix = Count_Matrix[subset(rn,! islib),]
+group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )             #Build a group descriptor
+group = factor(group, levels=c(ControlName,TreatmentName))
+colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")                   #Relable columns
+results = edgeIt(Count_Matrix=Count_Matrix,group=group,outputfilename=outputfilename,
+                 fdrtype='BH',priordf=edgeR_priordf,fdrthresh=0.05,outputdir='.',
+                 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=c(),
+                 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
+                 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
+sessionInfo()
+]]>
+ 
+ 
+
+
+----
+
+**What it does**
+
+Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
+Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
+
+**Input**
+
+Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
+and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the 
+non-negative integer count of reads from one sample overlapping the feature. 
+The matrix must have a header row uniquely identifying the source samples, and unique row names in 
+the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
+
+**Specifying comparisons**
+
+This is basically dumbed down for two factors - case vs control.
+
+More complex interfaces are possible but painful at present. 
+Probably need to specify a phenotype file to do this better.
+Work in progress. Send code.
+
+If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
+put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or 
+A list of integers, one for each subject or an empty string if samples are all independent.
+If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
+Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
+
+So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
+eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
+8,9,1,1,2,2 
+as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
+
+**Methods available**
+
+You can run 3 popular Bioconductor packages available for count data.
+
+edgeR - see edgeR_ for details
+
+VOOM/limma - see limma_VOOM_ for details
+
+DESeq2 - see DESeq2_ for details
+
+and optionally camera in edgeR which works better if MSigDB is installed.
+
+**Outputs**
+
+Some helpful plots and analysis results. Note that most of these are produced using R code 
+suggested by the excellent documentation and vignettes for the Bioconductor
+packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
+
+***old rant on changes to Bioconductor package variable names between versions***
+
+The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) 
+breaking this and all other code that assumed the old name for this variable, 
+between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). 
+This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing 
+to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
+when their old scripts break. This tool currently now works with 2.4.6.
+
+**Note on prior.N**
+
+http://seqanswers.com/forums/showthread.php?t=5591 says:
+
+*prior.n*
+
+The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. 
+You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood 
+in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your 
+tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the 
+common likelihood the weight of one observation.
+
+In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, 
+or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that 
+you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation 
+(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? 
+What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. 
+If you have more samples, then the tagwise dispersion estimates will be more reliable, 
+so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. 
+
+
+From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
+
+Dear Dorota,
+
+The important settings are prior.df and trend.
+
+prior.n and prior.df are related through prior.df = prior.n * residual.df,
+and your experiment has residual.df = 36 - 12 = 24.  So the old setting of
+prior.n=10 is equivalent for your data to prior.df = 240, a very large
+value.  Going the other way, the new setting of prior.df=10 is equivalent
+to prior.n=10/24.
+
+To recover old results with the current software you would use
+
+  estimateTagwiseDisp(object, prior.df=240, trend="none")
+
+To get the new default from old software you would use
+
+  estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
+
+Actually the old trend method is equivalent to trend="loess" in the new
+software. You should use plotBCV(object) to see whether a trend is
+required.
+
+Note you could also use
+
+  prior.n = getPriorN(object, prior.df=10)
+
+to map between prior.df and prior.n.
+
+----
+
+**Attributions**
+
+edgeR - edgeR_ 
+
+VOOM/limma - limma_VOOM_ 
+
+DESeq2 - DESeq2_ for details
+
+See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
+
+Galaxy_ (that's what you are using right now!) for gluing everything together 
+
+Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is 
+licensed to you under the LGPL_ like other rgenetics artefacts
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
+.. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
+.. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
+.. _Galaxy: http://getgalaxy.org
+ 
+
+ 
+
+
diff -r 643164729057 -r 91e77df18a7f rgedgeR/test-data/edgeRtest1out.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgedgeR/test-data/edgeRtest1out.html	Fri Jul 26 22:04:58 2013 -0400
@@ -0,0 +1,813 @@
+ 
+         
+          
+        
+
Galaxy Tool "edgeR" run at 12/06/2013 16:42:51
+
+
+ 
+
+ 
+
+ 
+
+    
+ 
+
+
+
Rscript log
+
+## Toolfactory generated command line = Rscript - None /data/home/rlazarus/galaxy-central/database/files/000/dataset_2.dat
+
+Loading required package: gtools
+
+Loading required package: gdata
+
+gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
+
+gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
+
+Attaching package: ‘gdata’
+
+The following object(s) are masked from ‘package:stats’:
+
+    nobs
+
+The following object(s) are masked from ‘package:utils’:
+
+    object.size
+
+Loading required package: caTools
+
+Loading required package: grid
+
+Loading required package: KernSmooth
+
+KernSmooth 2.23 loaded
+
+Copyright M. P. Wand 1997-2009
+
+Loading required package: MASS
+
+Attaching package: ‘gplots’
+
+The following object(s) are masked from ‘package:stats’:
+
+    lowess
+
+Loading required package: Biobase
+
+Loading required package: BiocGenerics
+
+Loading required package: methods
+
+Attaching package: ‘BiocGenerics’
+
+The following object(s) are masked from ‘package:gdata’:
+
+    combine
+
+The following object(s) are masked from ‘package:stats’:
+
+    xtabs
+
+The following object(s) are masked from ‘package:base’:
+
+    anyDuplicated, cbind, colnames, duplicated, eval, Filter, Find,
+
+    get, intersect, lapply, Map, mapply, mget, order, paste, pmax,
+
+    pmax.int, pmin, pmin.int, Position, rbind, Reduce, rep.int,
+
+    rownames, sapply, setdiff, table, tapply, union, unique
+
+Welcome to Bioconductor
+
+    Vignettes contain introductory material; view with
+
+    'browseVignettes()'. To cite Bioconductor, see
+
+    'citation("Biobase")', and for packages 'citation("pkgname")'.
+
+Loading required package: locfit
+
+locfit 1.5-9.1 	 2013-03-22
+
+Loading required package: lattice
+
+Warning message:
+
+found methods to import for function ‘eapply’ but not the generic itself 
+
+Loading required package: limma
+
+Attaching package: ‘limma’
+
+The following object(s) are masked from ‘package:DESeq’:
+
+    plotMA
+
+# got TCols=2 3 4 8; CCols=1 5 6 7
+
+[1] # Quantiles for non-zero row counts:
+
+       0%       10%       20%       30%       40%       50%       60%       70%       80%       90%      100% 
+
+      1.0       1.0       2.0       3.0       4.0       8.0      13.0      24.0      86.6     753.0 2325567.0 
+
+[1] Read 3242 contigs. Removed 1494 with no reads. After filtering at count quantile =0.3, there are 1141 contigs
+
+[1] # Total low count contigs per sample =  145,111,155,120,170,67,203,86
+
+Calculating library sizes from column totals.
+
+[1] Using samples: case_X11706He_AGTTCC_L001_R1_001_trimmed.fastq_bwa.sam.bam,case_X11699He_GGCTAC_L001_R1_001_trimmed.fastq_bwa.sam.bam,case_X11698He_TAGCTT_L001_R1_001_trimmed.fastq_bwa.sam.bam,case_X11700He_CTTGTA_L001_R1_001_trimmed.fastq_bwa.sam.bam,control_X11706Liv_CAAAAG_L003_R1_001_trimmed.fastq_bwa.sam.bam,control_X11700Liv_ATTCCT_L003_R1_001_trimmed.fastq_bwa.sam.bam,control_X11698Liv_ACTGAT_L003_R1_001_trimmed.fastq_bwa.sam.bam,control_X11699Liv_ATGAGC_L003_R1_001_trimmed.fastq_bwa.sam.bam
+
+[1] Using design matrix:
+
+  (Intercept) groupcase
+
+1           1         1
+
+2           1         1
+
+3           1         1
+
+4           1         1
+
+5           1         0
+
+6           1         0
+
+7           1         0
+
+8           1         0
+
+attr(,"assign")
+
+[1] 0 1
+
+attr(,"contrasts")
+
+attr(,"contrasts")$group
+
+[1] contr.treatment
+
+[1] "No GLM fit outlier genes found\n"
+
+[1] Common Dispersion = 0.24088423096811 CV =  0.490799583300669 getPriorN =  3.33333333333333
+
+[1] "Raw sample read totals 1440960,1341813,2888924,1428365,2443751,1644652,1682104,1806045"
+
+[1] raw contig counts by sample:
+
+ case_X11706He_AGTTCC case_X11699He_GGCTAC case_X11698He_TAGCTT case_X11700He_CTTGTA control_X11706Liv_CA control_X11700Liv_AT control_X11698Liv_AC control_X11699Liv_AT
+
+ Min.   :0.0043       Min.   :0.0043       Min.   :0.0043       Min.   :0.0043       Min.   :0.0043       Min.   :0.0043       Min.   :0.0043       Min.   :0.0043      
+
+ 1st Qu.:0.0043       1st Qu.:0.0043       1st Qu.:0.0043       1st Qu.:0.0043       1st Qu.:0.3032       1st Qu.:0.0043       1st Qu.:0.3032       1st Qu.:0.0043      
+
+ Median :0.4786       Median :0.4786       Median :0.4786       Median :0.4786       Median :0.7789       Median :0.6031       Median :0.6998       Median :0.6031      
+
+ Mean   :0.9210       Mean   :0.9428       Mean   :1.0205       Mean   :0.9753       Mean   :1.0519       Mean   :1.0343       Mean   :0.9855       Mean   :0.9966      
+
+ 3rd Qu.:1.5770       3rd Qu.:1.5855       3rd Qu.:1.7161       3rd Qu.:1.6022       3rd Qu.:1.5410       3rd Qu.:1.6335       3rd Qu.:1.4473       3rd Qu.:1.5534      
+
+ Max.   :5.3609       Max.   :5.3589       Max.   :5.6967       Max.   :5.3702       Max.   :5.8209       Max.   :5.6905       Max.   :5.7999       Max.   :5.7215      
+
+ NA's   :902          NA's   :957          NA's   :821          NA's   :950          NA's   :650          NA's   :969          NA's   :664          NA's   :886         
+
+[1] normalised contig counts by sample:
+
+ case_X11706He_AGTTCC case_X11699He_GGCTAC case_X11698He_TAGCTT case_X11700He_CTTGTA control_X11706Liv_CA control_X11700Liv_AT control_X11698Liv_AC control_X11699Liv_AT
+
+ Min.   :-Inf         Min.   :-Inf         Min.   :-Inf         Min.   :-Inf         Min.   :-Inf         Min.   :-Inf         Min.   :-Inf         Min.   :-Inf        
+
+ 1st Qu.:-Inf         1st Qu.:-Inf         1st Qu.:-Inf         1st Qu.:-Inf         1st Qu.:   0         1st Qu.:-Inf         1st Qu.:   0         1st Qu.:-Inf        
+
+ Median :   0         Median :   0         Median :   0         Median :   0         Median :   0         Median :   0         Median :   0         Median :   0        
+
+ Mean   :-Inf         Mean   :-Inf         Mean   :-Inf         Mean   :-Inf         Mean   :-Inf         Mean   :-Inf         Mean   :-Inf         Mean   :-Inf        
+
+ 3rd Qu.:   1         3rd Qu.:   1         3rd Qu.:   1         3rd Qu.:   1         3rd Qu.:   1         3rd Qu.:   1         3rd Qu.:   1         3rd Qu.:   1        
+
+ Max.   :   5         Max.   :   5         Max.   :   6         Max.   :   5         Max.   :   6         Max.   :   6         Max.   :   6         Max.   :   5        
+
+[1] Using ncol rawrs= 8
+
+[1] # writing output
+
+[1] # urls
+
+ 
+
+
+
+    
+         
+    
+         
+            
+                
+                    
+                         
+                 
+                $INSTALL_DIR 
+                echo "source('http://bioconductor.org/biocLite.R')" > $INSTALL_DIR/runme.R 
+                echo "installme=c('edgeR','limma','DESeq','DESeq2','gplots')" >> $INSTALL_DIR/runme.R 
+                echo "biocLite()" >> $INSTALL_DIR/runme.R 
+                echo "biocLite(installme)" >> $INSTALL_DIR/runme.R 
+                echo "quit(save='no')" >> $INSTALL_DIR/runme.R                 
+                 export PATH=$PATH && export R_HOME=$R_HOME && export R_LIBS=$R_LIBS && R CMD BATCH $INSTALL_DIR/runme.R   
+             
+         
+        Installs some basic bioc packages for the edgeR wrapper
+        It's clunky but this is the only way I could get anything installed into the package_r3 R
+        
+     
+