# HG changeset patch
# User fubar
# Date 1420529686 18000
# Node ID d6fbd1cf7768c88cc2f19286ab386b8d84277b52
# Parent  0de9466084233ca499c8579383b6ea8066661595
Uploaded
diff -r 0de946608423 -r d6fbd1cf7768 rgToolFactory.py
--- a/rgToolFactory.py	Tue Jan 06 02:20:55 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,638 +0,0 @@
-# rgToolFactory.py
-# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
-# 
-# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
-# 
-# all rights reserved
-# Licensed under the LGPL
-# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
-#
-# march 2014
-# added ghostscript and graphicsmagick as dependencies 
-# fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
-# errors ensued
-#
-# august 2013
-# found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
-#
-# july 2013
-# added ability to combine images and individual log files into html output
-# just make sure there's a log file foo.log and it will be output
-# together with all images named like "foo_*.pdf
-# otherwise old format for html
-#
-# January 2013
-# problem pointed out by Carlos Borroto
-# added escaping for <>$ - thought I did that ages ago...
-#
-# August 11 2012 
-# changed to use shell=False and cl as a sequence
-
-# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
-# It also serves as the wrapper for the new tool.
-# 
-# you paste and run your script
-# Only works for simple scripts that read one input from the history.
-# Optionally can write one new history dataset,
-# and optionally collect any number of outputs into links on an autogenerated HTML page.
-
-# DO NOT install on a public or important site - please.
-
-# installed generated tools are fine if the script is safe.
-# They just run normally and their user cannot do anything unusually insecure
-# but please, practice safe toolshed.
-# Read the fucking code before you install any tool 
-# especially this one
-
-# After you get the script working on some test data, you can
-# optionally generate a toolshed compatible gzip file
-# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
-# safe and largely automated installation in a production Galaxy.
-
-# If you opt for an HTML output, you get all the script outputs arranged
-# as a single Html history item - all output files are linked, thumbnails for all the pdfs.
-# Ugly but really inexpensive.
-# 
-# Patches appreciated please. 
-#
-#
-# long route to June 2012 product
-# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
-# derived from an integrated script model  
-# called rgBaseScriptWrapper.py
-# Note to the unwary:
-#   This tool allows arbitrary scripting on your Galaxy as the Galaxy user
-#   There is nothing stopping a malicious user doing whatever they choose
-#   Extremely dangerous!!
-#   Totally insecure. So, trusted users only
-#
-# preferred model is a developer using their throw away workstation instance - ie a private site.
-# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
-#
-
-import sys 
-import shutil 
-import subprocess 
-import os 
-import time 
-import tempfile 
-import optparse
-import tarfile
-import re
-import shutil
-import math
-
-progname = os.path.split(sys.argv[0])[1] 
-myversion = 'V001.1 March 2014' 
-verbose = False 
-debug = False
-toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
-
-def timenow():
-    """return current time as a string
-    """
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-html_escape_table = {
-     "&": "&",
-     ">": ">",
-     "<": "<",
-     "$": "\$"
-     }
-
-def html_escape(text):
-     """Produce entities within text."""
-     return "".join(html_escape_table.get(c,c) for c in text)
-
-def cmd_exists(cmd):
-     return subprocess.call("type " + cmd, shell=True, 
-           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
-
-
-class ScriptRunner:
-    """class is a wrapper for an arbitrary script
-    """
-
-    def __init__(self,opts=None,treatbashSpecial=True):
-        """
-        cleanup inputs, setup some outputs
-        
-        """
-        self.useGM = cmd_exists('gm')
-        self.useIM = cmd_exists('convert')
-        self.useGS = cmd_exists('gs')
-        self.temp_warned = False # we want only one warning if $TMP not set
-        self.treatbashSpecial = treatbashSpecial
-        if opts.output_dir: # simplify for the tool tarball
-            os.chdir(opts.output_dir)
-        self.thumbformat = 'png'
-        self.opts = opts
-        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
-        self.toolid = self.toolname
-        self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
-        self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
-        self.xmlfile = '%s.xml' % self.toolname
-        s = open(self.opts.script_path,'r').readlines()
-        s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
-        self.script = '\n'.join(s)
-        fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
-        tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
-        tscript.write(self.script)
-        tscript.close()
-        self.indentedScript = '\n'.join([' %s' % x for x in s]) # for restructured text in help
-        self.escapedScript = '\n'.join([html_escape(x) for x in s])
-        self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
-        if opts.output_dir: # may not want these complexities 
-            self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
-            art = '%s.%s' % (self.toolname,opts.interpreter)
-            artpath = os.path.join(self.opts.output_dir,art) # need full path
-            artifact = open(artpath,'w') # use self.sfile as script source for Popen
-            artifact.write(self.script)
-            artifact.close()
-        self.cl = []
-        self.html = []
-        a = self.cl.append
-        a(opts.interpreter)
-        if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
-            a(self.sfile)
-        else:
-            a('-') # stdin
-        a(opts.input_tab)
-        a(opts.output_tab)
-        self.outFormats = 'tabular' # TODO make this an option at tool generation time
-        self.inputFormats = 'tabular' # TODO make this an option at tool generation time
-        self.test1Input = '%s_test1_input.xls' % self.toolname
-        self.test1Output = '%s_test1_output.xls' % self.toolname
-        self.test1HTML = '%s_test1_output.html' % self.toolname
-
-    def makeXML(self):
-        """
-        Create a Galaxy xml tool wrapper for the new script as a string to write out
-        fixme - use templating or something less fugly than this example of what we produce
-
-        
-            a tabular file 
-            
-            reverse.py --script_path "$runMe" --interpreter "python" 
-            --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file" 
-             
-            
-             
-            
-             
-            
-            
-**What it Does**
-
-Reverse the columns in a tabular file
-
-             
-            
-            
-            
-# reverse order of columns in a tabular file
-import sys
-inp = sys.argv[1]
-outp = sys.argv[2]
-i = open(inp,'r')
-o = open(outp,'w')
-for row in i:
-     rs = row.rstrip().split('\t')
-     rs.reverse()
-     o.write('\t'.join(rs))
-     o.write('\n')
-i.close()
-o.close()
- 
-
-             
-             
-             
-        
-        """ 
-        newXML="""
-            %(tooldesc)s
-            %(command)s
-            
-            %(inputs)s
-             
-            
-            %(outputs)s
-             
-            
-            
-            %(script)s
-             
-             
-            %(tooltests)s
-            
-            %(help)s
-             
-             """ # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
-               
-        newCommand="""
-            %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" 
-            --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s 
-             """ # may NOT be an input or htmlout
-        tooltestsTabOnly = """
-         
-         
-         %s ' % self.opts.tool_desc
-        else:
-            xdict['tooldesc'] = ''
-        xdict['command_outputs'] = '' 
-        xdict['outputs'] = '' 
-        if self.opts.input_tab <> 'None':
-            xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
-            xdict['inputs'] = '
  
-        """ 
-        galhtmlattr = """
""" 
-        galhtmlpostfix = """
Galaxy Tool "%s" run at %s
%s %s %s %s %s images and outputs
' % sectionname)
-                    html.append('(Click on a thumbnail image to download the corresponding original PDF image)\n')
-                    for i,paths in enumerate(ourpdfs): 
-                        fname,thumb = paths
-                        s= """ \n'
-                            ntogo = 0
-                            if i < (npdf - 1): # more to come
-                               s += ''
-                               ntogo = nacross
-                        else:
-                            ntogo -= 1
-                        html.append(s)
-                    if html[-1].strip().endswith(' '):
-                        html.append('
  '*ntogo)
-                        html.append('\n')
-                logt = open(logfname,'r').readlines()
-                logtext = [x for x in logt if x.strip() > '']
-                html.append('%s log output
' % sectionname)
-                if len(logtext) > 1:
-                    html.append('\n\n')
-                    html += logtext
-                    html.append('\n \n')
-                else:
-                    html.append('%s is emptyOutput File Name (click to view) Size 
All output files available for downloading
\n')
-           html += fhtml # add all non-pdf files to the end of the display
-        else:
-            html.append('### Error - %s returned no files - please confirm that parameters are sane
' % self.opts.interpreter)
-        html.append(galhtmlpostfix)
-        htmlf = file(self.opts.output_html,'w')
-        htmlf.write('\n'.join(html))
-        htmlf.write('\n')
-        htmlf.close()
-        self.html = html
-
-
-    def run(self):
-        """
-        scripts must be small enough not to fill the pipe!
-        """
-        if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
-          retval = self.runBash()
-        else:
-            if self.opts.output_dir:
-                ste = open(self.elog,'w')
-                sto = open(self.tlog,'w')
-                sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
-                sto.flush()
-                p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
-            else:
-                p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
-            p.stdin.write(self.script)
-            p.stdin.close()
-            retval = p.wait()
-            if self.opts.output_dir:
-                sto.close()
-                ste.close()
-                err = open(self.elog,'r').read()
-                if retval <> 0 and err: # problem
-                    print >> sys.stderr,err
-            if self.opts.make_HTML:
-                self.makeHtml()
-        return retval
-
-    def runBash(self):
-        """
-        cannot use - for bash so use self.sfile
-        """
-        if self.opts.output_dir:
-            s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
-            sto = open(self.tlog,'w')
-            sto.write(s)
-            sto.flush()
-            p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
-        else:
-            p = subprocess.Popen(self.cl,shell=False)            
-        retval = p.wait()
-        if self.opts.output_dir:
-            sto.close()
-        if self.opts.make_HTML:
-            self.makeHtml()
-        return retval
-  
-
-def main():
-    u = """
-    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
-    rgToolFactory.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
-     
-    """
-    op = optparse.OptionParser()
-    a = op.add_option
-    a('--script_path',default=None)
-    a('--tool_name',default=None)
-    a('--interpreter',default=None)
-    a('--output_dir',default=None)
-    a('--output_html',default=None)
-    a('--input_tab',default="None")
-    a('--output_tab',default="None")
-    a('--user_email',default='Unknown')
-    a('--bad_user',default=None)
-    a('--make_Tool',default=None)
-    a('--make_HTML',default=None)
-    a('--help_text',default=None)
-    a('--tool_desc',default=None)
-    a('--new_tool',default=None)
-    a('--tool_version',default=None)
-    opts, args = op.parse_args()
-    assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
-    assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
-    assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
-    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
-    if opts.output_dir:
-        try:
-            os.makedirs(opts.output_dir)
-        except:
-            pass
-    r = ScriptRunner(opts)
-    if opts.make_Tool:
-        retcode = r.makeTooltar()
-    else:
-        retcode = r.run()
-    os.unlink(r.sfile)
-    if retcode:
-        sys.exit(retcode) # indicate failure to job runner
-
-
-if __name__ == "__main__":
-    main()
-
-
diff -r 0de946608423 -r d6fbd1cf7768 rgedgeRpaired.xml.camera
--- a/rgedgeRpaired.xml.camera	Tue Jan 06 02:20:55 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1084 +0,0 @@
-
-  models using BioConductor packages 
-  
-      biocbasics 
-      r302 
-      graphicsmagick 
-      ghostscript 
-   
-  
-  
-     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" 
-    --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
-   
-  
-    
-         
-    
-    
-         
-    
-    
-        Do not run edgeR 
-          Run edgeR 
-         
-         
-           
-          
-    
-    Do not run DESeq2 
-      Run DESeq2 
-     
-     
-         Parametric (default) fit for dispersions 
-            Local fit - this will automagically be used if parametric fit fails 
-            Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation 
-          
-      
-       
-     
-    Do not run VOOM 
-      Run VOOM 
-     
-    
-    fdr 
-            Benjamini Hochberg 
-            Benjamini Yukateli 
-            Bonferroni 
-            Hochberg 
-            Holm 
-            Hommel 
-            no control for multiple tests 
-    
-   
-  
-    
-         edgeR['doedgeR'] == "T" 
-     
-    
-         DESeq2['doDESeq2'] == "T" 
-     
-    
-         doVoom == "T" 
-     
-     
- 
-      
- 
-
- 
- 
-
-
-
- nsamp) {
-      dm =dm[1:nsamp,]
-      #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
-    }
-    newcolnames = substr(colnames(dm),1,20)
-    colnames(dm) = newcolnames
-    pdf(outpdfname)
-    heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
-         Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
-    dev.off()
-}
-
-hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
-{
-    # for 2 groups only was
-    #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
-    #pcols = unlist(lapply(group,col.map))
-    gu = unique(group)
-    colours = rainbow(length(gu),start=0.3,end=0.6)
-    pcols = colours[match(group,gu)]
-    nrows = nrow(cmat)
-    mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
-    if (nrows > nsamp)  {
-               cmat = cmat[c(1:nsamp),]
-               mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
-          }
-    newcolnames = substr(colnames(cmat),1,20)
-    colnames(cmat) = newcolnames
-    pdf(outpdfname)
-    heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
-    dev.off()
-}
-
-qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
-# stolen from https://gist.github.com/703512
-{
-    o = -log10(sort(pvector,decreasing=F))
-    e = -log10( 1:length(o)/length(o) )
-    o[o==-Inf] = reallysmall
-    o[o==Inf] = reallybig
-    maint = descr
-    pdf(outpdf)
-    plot(e,o,pch=19,cex=1, main=maint, ...,
-        xlab=expression(Expected~~-log[10](italic(p))),
-        ylab=expression(Observed~~-log[10](italic(p))),
-        xlim=c(0,max(e)), ylim=c(0,max(o)))
-    lines(e,e,col="red")
-    grid(col = "lightgray", lty = "dotted")
-    dev.off()
-}
-
-smearPlot = function(DGEList,deTags, outSmear, outMain)
-        {
-        pdf(outSmear)
-        plotSmear(DGEList,de.tags=deTags,main=outMain)
-        grid(col="lightgray", lty="dotted")
-        dev.off()
-        }
-        
-boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
-{   # 
-        nc = ncol(rawrs)
-        for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
-        fullnames = colnames(rawrs)
-        newcolnames = substr(colnames(rawrs),1,20)
-        colnames(rawrs) = newcolnames
-        newcolnames = substr(colnames(cleanrs),1,20)
-        colnames(cleanrs) = newcolnames
-        defpar = par(no.readonly=T)
-        print.noquote('raw contig counts by sample:')
-        print.noquote(summary(rawrs))
-        print.noquote('normalised contig counts by sample:')
-        print.noquote(summary(cleanrs))
-        pdf(pdfname)
-        par(mfrow=c(1,2))
-        boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
-        grid(col="lightgray",lty="dotted")
-        boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
-        grid(col="lightgray",lty="dotted")
-        dev.off()
-        pdfname = "sample_counts_histogram.pdf" 
-        nc = ncol(rawrs)
-        print.noquote(paste('Using ncol rawrs=',nc))
-        ncroot = round(sqrt(nc))
-        if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
-        m = c()
-        for (i in c(1:nc)) {
-              rhist = hist(rawrs[,i],breaks=100,plot=F)
-              m = append(m,max(rhist\$counts))
-             }
-        ymax = max(m)
-        ncols = length(fullnames)
-        if (ncols > 20) 
-        {
-           scale = 7*ncols/20
-           pdf(pdfname,width=scale,height=scale)
-        } else { 
-           pdf(pdfname)
-        }
-        par(mfrow=c(ncroot,ncroot))
-        for (i in c(1:nc)) {
-                 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", 
-                 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
-             }
-        dev.off()
-        par(defpar)
-      
-}
-
-cumPlot = function(rawrs,cleanrs,maint,myTitle)
-{   # updated to use ecdf
-        pdfname = "Filtering_rowsum_bar_charts.pdf"
-        defpar = par(no.readonly=T)
-        lrs = log(rawrs,10) 
-        lim = max(lrs)
-        pdf(pdfname)
-        par(mfrow=c(2,1))
-        hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
-             ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
-        grid(col="lightgray", lty="dotted")
-        lrs = log(cleanrs,10) 
-        hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
-             ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
-        grid(col="lightgray", lty="dotted")
-        dev.off()
-        par(defpar)
-}
-
-cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
-{   # updated to use ecdf
-        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
-        pdf(pdfname)
-        par(mfrow=c(2,1))
-        lastx = max(rawrs)
-        rawe = knots(ecdf(rawrs))
-        cleane = knots(ecdf(cleanrs))
-        cy = 1:length(cleane)/length(cleane)
-        ry = 1:length(rawe)/length(rawe)
-        plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
-             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
-        grid(col="blue")
-        plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
-             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
-        grid(col="blue")
-        dev.off()
-}
-
-
-
-doGSEAold = function(y=NULL,design=NULL,histgmt="",
-                  bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-                  ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
-{
-  sink('Camera.log')
-  genesets = c()
-  if (bigmt > "")
-  {
-    bigenesets = readLines(bigmt)
-    genesets = bigenesets
-  }
-  if (histgmt > "")
-  {
-    hgenesets = readLines(histgmt)
-    if (bigmt > "") {
-      genesets = rbind(genesets,hgenesets)
-    } else {
-      genesets = hgenesets
-    } # use only history if no bi
-  }
-  print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
-  genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
-  outf = outfname
-  head=paste(myTitle,'edgeR GSEA')
-  write(head,file=outfname,append=F)
-  ntest=length(genesets)
-  urownames = toupper(rownames(y))
-  upcam = c()
-  downcam = c()
-  for (i in 1:ntest) {
-    gs = unlist(genesets[i])
-    g = gs[1] # geneset_id
-    u = gs[2]
-    if (u > "") { u = paste("",u," ",sep="") }
-    glist = gs[3:length(gs)] # member gene symbols
-    glist = toupper(glist)
-    inglist = urownames %in% glist
-    nin = sum(inglist)
-    if ((nin > minnin) && (nin < maxnin)) {
-      ### print(paste('@@found',sum(inglist),'genes in glist'))
-      camres = camera(y=y,index=inglist,design=design)
-      if (! is.null(camres)) {
-      rownames(camres) = g # gene set name
-      camres = cbind(GeneSet=g,URL=u,camres)
-      if (camres\$Direction == "Up") 
-        {
-        upcam = rbind(upcam,camres) } else {
-          downcam = rbind(downcam,camres)
-        }
-      }
-   }
-  }
-  uscam = upcam[order(upcam\$PValue),]
-  unadjp = uscam\$PValue
-  uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
-  dscam = downcam[order(downcam\$PValue),]
-  unadjp = dscam\$PValue
-  dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
-  write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
-  write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
-  write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
-  sink()
-}
-
-
-
-
-doGSEA = function(y=NULL,design=NULL,histgmt="",
-                  bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-                  ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
-{
-  sink('Camera.log')
-  genesets = c()
-  if (bigmt > "")
-  {
-    bigenesets = readLines(bigmt)
-    genesets = bigenesets
-  }
-  if (histgmt > "")
-  {
-    hgenesets = readLines(histgmt)
-    if (bigmt > "") {
-      genesets = rbind(genesets,hgenesets)
-    } else {
-      genesets = hgenesets
-    } # use only history if no bi
-  }
-  print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
-  genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
-  outf = outfname
-  head=paste(myTitle,'edgeR GSEA')
-  write(head,file=outfname,append=F)
-  ntest=length(genesets)
-  urownames = toupper(rownames(y))
-  upcam = c()
-  downcam = c()
-  incam = c()
-  urls = c()
-  gsids = c()
-  for (i in 1:ntest) {
-    gs = unlist(genesets[i])
-    gsid = gs[1] # geneset_id
-    url = gs[2]
-    if (url > "") { url = paste("",url," ",sep="") }
-    glist = gs[3:length(gs)] # member gene symbols
-    glist = toupper(glist)
-    inglist = urownames %in% glist
-    nin = sum(inglist)
-    if ((nin > minnin) && (nin < maxnin)) {
-      incam = c(incam,inglist)
-      gsids = c(gsids,gsid)
-      urls = c(urls,url)
-    }
-  }
-  incam = as.list(incam)
-  names(incam) = gsids
-  allcam = camera(y=y,index=incam,design=design)
-  allcamres = cbind(geneset=gsids,allcam,URL=urls)
-  for (i in 1:ntest) {
-    camres = allcamres[i]
-    res = try(test = (camres\$Direction == "Up"))
-    if ("try-error" %in% class(res)) {
-      cat("test failed, camres = :")
-      print.noquote(camres)
-    } else  { if (camres\$Direction == "Up")
-    {  upcam = rbind(upcam,camres)
-    } else { downcam = rbind(downcam,camres)
-    }
-              
-    }
-  }
-  uscam = upcam[order(upcam\$PValue),]
-  unadjp = uscam\$PValue
-  uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
-  dscam = downcam[order(downcam\$PValue),]
-  unadjp = dscam\$PValue
-  dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
-  write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
-  write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
-  write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
-  sink()
-  }
- 
-
-edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, 
-        fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
-        filterquantile=0.2, subjects=c(),mydesign=NULL,
-        doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
-        histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-        doCook=F,DESeq_fitType="parameteric") 
-{
-  # Error handling
-  if (length(unique(group))!=2){
-    print("Number of conditions identified in experiment does not equal 2")
-    q()
-    }
-  require(edgeR)
-  options(width = 512) 
-  mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
-  allN = nrow(Count_Matrix)
-  nscut = round(ncol(Count_Matrix)/2)
-  colTotmillionreads = colSums(Count_Matrix)/1e6
-  counts.dataframe = as.data.frame(c()) 
-  rawrs = rowSums(Count_Matrix)
-  nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
-  nzN = nrow(nonzerod)
-  nzrs = rowSums(nonzerod)
-  zN = allN - nzN
-  print('# Quantiles for non-zero row counts:',quote=F)
-  print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
-  if (useNDF == T)
-  {
-    gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
-    lo = colSums(Count_Matrix[!gt1rpin3,])
-    workCM = Count_Matrix[gt1rpin3,]
-    cleanrs = rowSums(workCM)
-    cleanN = length(cleanrs)
-    meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
-    print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
-    maint = paste('Filter >=1/million reads in >=',nscut,'samples')
-  }   else {        
-    useme = (nzrs > quantile(nzrs,filterquantile))
-    workCM = nonzerod[useme,]
-    lo = colSums(nonzerod[!useme,])
-    cleanrs = rowSums(workCM)
-    cleanN = length(cleanrs)
-    meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
-    print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
-    maint = paste('Filter below',filterquantile,'quantile')
-  }
-  cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
-  allgenes = rownames(workCM)
-  reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
-  genecards=" 0.8) # is ucsc style string
-  {
-    print("@@ using ucsc substitution for urls")
-    contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes," ")
-  } else {
-    print("@@ using genecards substitution for urls")
-    contigurls = paste0(genecards,allgenes,"\'>",allgenes,"")
-  }
-  print.noquote("# urls")
-  print.noquote(head(contigurls))
-  print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) 
-  cmrowsums = rowSums(workCM)
-  TName=unique(group)[1]
-  CName=unique(group)[2]
-  if (is.null(mydesign)) {
-    if (length(subjects) == 0) 
-    {
-      mydesign = model.matrix(~group)
-    } 
-    else { 
-      subjf = factor(subjects)
-      mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
-    }
-  } 
-  print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
-  print.noquote('Using design matrix:')
-  print.noquote(mydesign)
-  if (doedgeR) {
-  sink('edgeR.log')
-  #### Setup DGEList object
-  DGEList = DGEList(counts=workCM, group = group)
-  DGEList = calcNormFactors(DGEList)
-
-  DGEList = estimateGLMCommonDisp(DGEList,mydesign)
-  comdisp = DGEList\$common.dispersion
-  DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-  if (edgeR_priordf > 0) {
-    print.noquote(paste("prior.df =",edgeR_priordf))
-    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf)
-  } else {
-    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-  }
-  DGLM = glmFit(DGEList,design=mydesign)
-  DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
-  efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
-  normData = (1e+06*DGEList\$counts/efflib)
-  uoutput = cbind( 
-    Name=as.character(rownames(DGEList\$counts)),
-    DE\$table,
-    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-    Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
-    DGEList\$counts
-  )
-  soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
-  goodness = gof(DGLM, pcutoff=fdrthresh)
-  if (sum(goodness\$outlier) > 0) {
-    print.noquote('GLM outliers:')
-    print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
-    } else { 
-      print('No GLM fit outlier genes found\n')
-    }
-  z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
-  pdf("edgeR_GoodnessofFit.pdf")
-  qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
-  abline(0,1,lwd=3)
-  points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
-  dev.off()
-  estpriorn = getPriorN(DGEList)
-  print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
-  efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors
-  normData = (1e+06*DGEList\$counts/efflib)
-  uniqueg = unique(group)
-  #### Plot MDS
-  sample_colors =  match(group,levels(group))
-  sampleTypes = levels(factor(group))
-  print.noquote(sampleTypes)
-  pdf("edgeR_MDSplot.pdf")
-  plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
-  legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
-  grid(col="blue")
-  dev.off()
-  colnames(normData) = paste( colnames(normData),'N',sep="_")
-  print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
-  nzd = data.frame(log(nonzerod + 1e-2,10))
-  try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") )
-  write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
-  tt = cbind( 
-    Name=as.character(rownames(DGEList\$counts)),
-    DE\$table,
-    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-    Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
-  )
-  print.noquote("# edgeR Top tags\n")
-  tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
-  tt = tt[order(DE\$table\$PValue),] 
-  print.noquote(tt[1:50,])
-  deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
-  nsig = length(deTags)
-  print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
-  deColours = ifelse(deTags,'red','black')
-  pdf("edgeR_BCV_vs_abundance.pdf")
-  plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
-  dev.off()
-  dg = DGEList[order(DE\$table\$PValue),]
-  #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
-  efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors
-  normData = (1e+06*dg\$counts/efflib)
-  outpdfname="edgeR_top_100_heatmap.pdf"
-  hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle))
-  outSmear = "edgeR_smearplot.pdf"
-  outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
-  smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
-  qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf')
-  norm.factor = DGEList\$samples\$norm.factors
-  topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
-  edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
-  edgeRcounts = rep(0, length(allgenes))
-  edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
-  sink()
-  } ### doedgeR
-  if (doDESeq2 == T)
-  {
-    sink("DESeq2.log")
-    # DESeq2
-    require('DESeq2')
-    library('RColorBrewer')
-    if (length(subjects) == 0)
-        {
-        pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
-        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
-        } else {
-        pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
-        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
-        }
-    #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
-    #rDESeq = results(DESeq2)
-    #newCountDataSet(workCM, group)
-    deSeqDatsizefac = estimateSizeFactors(deSEQds)
-    deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
-    resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
-    rDESeq = as.data.frame(results(resDESeq))
-    rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
-    srDESeq = rDESeq[order(rDESeq\$pvalue),]
-    qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf')
-    cat("# DESeq top 50\n")
-    print.noquote(srDESeq[1:50,])
-    write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
-    topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
-    DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
-    DESeqcounts = rep(0, length(allgenes))
-    DESeqcounts[DESeqcountsindex] = 1
-    pdf("DESeq2_dispersion_estimates.pdf")
-    plotDispEsts(resDESeq)
-    dev.off()
-    ysmall = abs(min(rDESeq\$log2FoldChange))
-    ybig = abs(max(rDESeq\$log2FoldChange))
-    ylimit = min(4,ysmall,ybig)
-    pdf("DESeq2_MA_plot.pdf")
-    plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
-    dev.off()
-    rlogres = rlogTransformation(resDESeq)
-    sampledists = dist( t( assay(rlogres) ) )
-    sdmat = as.matrix(sampledists)
-    pdf("DESeq2_sample_distance_plot.pdf")
-    heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
-         col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
-    dev.off()
-    ###outpdfname="DESeq2_top50_heatmap.pdf"
-    ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
-    sink()
-    result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
-    if ("try-error" %in% class(result)) {
-         print.noquote('DESeq2 plotPCA failed.')
-         } else {
-         pdf("DESeq2_PCA_plot.pdf")
-         #### wtf - print? Seems needed to get this to work
-         print(ppca)
-         dev.off()
-        }
-  }
-
-  if (doVoom == T) {
-      sink('VOOM.log')
-      if (doedgeR == F) {
-         #### Setup DGEList object
-         DGEList = DGEList(counts=workCM, group = group)
-         DGEList = calcNormFactors(DGEList)
-         DGEList = estimateGLMCommonDisp(DGEList,mydesign)
-         DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
-         DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-         DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
-         norm.factor = DGEList\$samples\$norm.factors
-         }
-      pdf("VOOM_mean_variance_plot.pdf")
-      dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor)
-      dev.off()
-      # Use limma to fit data
-      fit = lmFit(dat.voomed, mydesign)
-      fit = eBayes(fit)
-      rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
-      qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf')
-      rownames(rvoom) = rownames(workCM)
-      rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
-      srvoom = rvoom[order(rvoom\$P.Value),]
-      cat("# VOOM top 50\n")
-      print(srvoom[1:50,])
-      write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F)
-      # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
-      topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
-      voomcountsindex = which(allgenes %in% topresults.voom\$ID)
-      voomcounts = rep(0, length(allgenes))
-      voomcounts[voomcountsindex] = 1
-      sink()
-  }
-
-  if (doCamera) {
-  doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
-    outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
-  }
-
-  if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
-    if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
-        vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
-        counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, 
-                                       VOOM_limma = voomcounts, row.names = allgenes)
-       } else if ((doDESeq2==T) && (doedgeR==T))  {
-         vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
-         counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
-       } else if ((doVoom==T) && (doedgeR==T)) {
-        vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
-        counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
-       }
-    
-    if (nrow(counts.dataframe > 1)) {
-      counts.venn = vennCounts(counts.dataframe)
-      vennf = "Venn_significant_genes_overlap.pdf" 
-      pdf(vennf)
-      vennDiagram(counts.venn,main=vennmain,col="maroon")
-      dev.off()
-    }
-  } #### doDESeq2 or doVoom
-  
-}
-#### Done
-
-###sink(stdout(),append=T,type="message")
-builtin_gmt = ""
-history_gmt = ""
-history_gmt_name = ""
-out_edgeR = F
-out_DESeq2 = F
-out_VOOM = "$out_VOOM"
-doDESeq2 = $DESeq2.doDESeq2 # make these T or F
-doVoom = $doVoom
-doCamera = F
-doedgeR = $edgeR.doedgeR
-edgeR_priordf = 0
-
-
-#if $doVoom == "T":
-  out_VOOM = "$out_VOOM"
-#end if
-
-#if $DESeq2.doDESeq2 == "T":
-  out_DESeq2 = "$out_DESeq2"
-  DESeq_fitType = "$DESeq2.DESeq_fitType"
-#end if
-
-#if $edgeR.doedgeR == "T":
-  out_edgeR = "$out_edgeR"
-  edgeR_priordf = $edgeR.edgeR_priordf  
-#end if
-
-
-
-if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
-{
-write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
-quit(save="no",status=2)
-}
-
-Out_Dir = "$html_file.files_path"
-Input =  "$input1"
-TreatmentName = "$treatment_name"
-TreatmentCols = "$Treat_cols"
-ControlName = "$control_name"
-ControlCols= "$Control_cols"
-org = "$input1.dbkey"
-if (org == "") { org = "hg19"}
-fdrtype = "$fdrtype"
-fdrthresh = $fdrthresh
-useNDF = $useNDF
-fQ = $fQ # non-differential centile cutoff
-myTitle = "$title"
-sids = strsplit("$subjectids",',')
-subjects = unlist(sids)
-nsubj = length(subjects)
-TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
-CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 
-cat('Got TCols=')
-cat(TCols)
-cat('; CCols=')
-cat(CCols)
-cat('\n')
-useCols = c(TCols,CCols)
-if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
-Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
-snames = colnames(Count_Matrix)
-nsamples = length(snames)
-if (nsubj >  0 & nsubj != nsamples) {
-options("show.error.messages"=T)
-mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
-   'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
-write(mess, stderr())
-quit(save="no",status=4)
-}
-if (length(subjects) != 0) {subjects = subjects[useCols]}
-Count_Matrix = Count_Matrix[,useCols] ### reorder columns
-rn = rownames(Count_Matrix)
-islib = rn %in% c('librarySize','NotInBedRegions')
-LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
-Count_Matrix = Count_Matrix[subset(rn,! islib),]
-group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )             #Build a group descriptor
-group = factor(group, levels=c(ControlName,TreatmentName))
-colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")                   #Relable columns
-results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
-                 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
-                 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
-                 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
-                 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
-sessionInfo()
-]]>
- 
- 
-
-
-**What it does**
-
-Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
-Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
-
-**Input**
-
-Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
-and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the 
-non-negative integer count of reads from one sample overlapping the feature. 
-The matrix must have a header row uniquely identifying the source samples, and unique row names in 
-the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
-
-**Specifying comparisons**
-
-This is basically dumbed down for two factors - case vs control.
-
-More complex interfaces are possible but painful at present. 
-Probably need to specify a phenotype file to do this better.
-Work in progress. Send code.
-
-If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
-put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or 
-A list of integers, one for each subject or an empty string if samples are all independent.
-If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
-Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
-
-So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
-eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
-8,9,1,1,2,2 
-as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
-
-**Methods available**
-
-You can run 3 popular Bioconductor packages available for count data.
-
-edgeR - see edgeR_ for details
-
-VOOM/limma - see limma_VOOM_ for details
-
-DESeq2 - see DESeq2_ for details
-
-and optionally camera in edgeR which works better if MSigDB is installed.
-
-**Outputs**
-
-Some helpful plots and analysis results. Note that most of these are produced using R code 
-suggested by the excellent documentation and vignettes for the Bioconductor
-packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
-
-**Note on Voom**
-
-The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
-
-This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
-
-voom is an acronym for mean-variance modelling at the observational level.
-The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
-Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
-This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
-The weights are then used in the linear modelling process to adjust for heteroscedasticity.
-
-In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
-The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
-The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
-Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
-Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
-This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
-
-
-Author(s)
-
-Charity Law and Gordon Smyth
-
-References
-
-Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
-
-Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
-Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
-http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
-
-See Also
-
-A voom case study is given in the edgeR User's Guide.
-
-vooma is a similar function but for microarrays instead of RNA-seq.
-
-
-***old rant on changes to Bioconductor package variable names between versions***
-
-The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) 
-breaking this and all other code that assumed the old name for this variable, 
-between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). 
-This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing 
-to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
-when their old scripts break. This tool currently now works with 2.4.6.
-
-**Note on prior.N**
-
-http://seqanswers.com/forums/showthread.php?t=5591 says:
-
-*prior.n*
-
-The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. 
-You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood 
-in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your 
-tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the 
-common likelihood the weight of one observation.
-
-In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, 
-or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that 
-you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation 
-(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? 
-What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. 
-If you have more samples, then the tagwise dispersion estimates will be more reliable, 
-so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. 
-
-
-From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
-
-Dear Dorota,
-
-The important settings are prior.df and trend.
-
-prior.n and prior.df are related through prior.df = prior.n * residual.df,
-and your experiment has residual.df = 36 - 12 = 24.  So the old setting of
-prior.n=10 is equivalent for your data to prior.df = 240, a very large
-value.  Going the other way, the new setting of prior.df=10 is equivalent
-to prior.n=10/24.
-
-To recover old results with the current software you would use
-
-  estimateTagwiseDisp(object, prior.df=240, trend="none")
-
-To get the new default from old software you would use
-
-  estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
-
-Actually the old trend method is equivalent to trend="loess" in the new
-software. You should use plotBCV(object) to see whether a trend is
-required.
-
-Note you could also use
-
-  prior.n = getPriorN(object, prior.df=10)
-
-to map between prior.df and prior.n.
-
-----
-
-**Attributions**
-
-edgeR - edgeR_ 
-
-VOOM/limma - limma_VOOM_ 
-
-DESeq2 - DESeq2_ for details
-
-See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
-
-Galaxy_ (that's what you are using right now!) for gluing everything together 
-
-Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is 
-licensed to you under the LGPL_ like other rgenetics artefacts
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
-.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
-.. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
-.. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
-.. _Galaxy: http://getgalaxy.org
- 
-
- 
-
-
diff -r 0de946608423 -r d6fbd1cf7768 rgedgeRpaired_nocamera.xml
--- a/rgedgeRpaired_nocamera.xml	Tue Jan 06 02:20:55 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1069 +0,0 @@
-
-  models using BioConductor packages 
-  
-      R 
-      graphicsmagick 
-      ghostscript  
-      biocbasics 
-   
-  
-  
-     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts" 
-    --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
-   
-  
-    
-         
-    
-    
-         
-    
-    
-        Do not run edgeR 
-          Run edgeR 
-         
-         
-          Use ordinary deviance estimates 
-             Use robust deviance estimates 
-             use Anscombe robust deviance estimates 
-          
-          
-          
-    
-    Do not run DESeq2 
-      Run DESeq2 
-     
-     
-         Parametric (default) fit for dispersions 
-            Local fit - this will automagically be used if parametric fit fails 
-            Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation 
-          
-      
-       
-     
-    Do not run VOOM 
-      Run VOOM 
-     
-    fdr 
-            Benjamini Hochberg 
-            Benjamini Yukateli 
-            Bonferroni 
-            Hochberg 
-            Holm 
-            Hommel 
-            no control for multiple tests 
-    
-   
-  
-    
-         edgeR['doedgeR'] == "T" 
-     
-    
-         DESeq2['doDESeq2'] == "T" 
-     
-    
-         doVoom == "T" 
-     
-     
- 
-      
- 
-
- 
- 
-
-
-
- nsamp) {
-      dm =dm[1:nsamp,]
-      #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total')
-    }
-    newcolnames = substr(colnames(dm),1,20)
-    colnames(dm) = newcolnames
-    pdf(outpdfname)
-    heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
-         Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
-    dev.off()
-}
-
-hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
-{
-    # for 2 groups only was
-    #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
-    #pcols = unlist(lapply(group,col.map))
-    gu = unique(group)
-    colours = rainbow(length(gu),start=0.3,end=0.6)
-    pcols = colours[match(group,gu)]
-    nrows = nrow(cmat)
-    mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
-    if (nrows > nsamp)  {
-               cmat = cmat[c(1:nsamp),]
-               mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
-          }
-    newcolnames = substr(colnames(cmat),1,20)
-    colnames(cmat) = newcolnames
-    pdf(outpdfname)
-    heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
-    dev.off()
-}
-
-qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...)
-# stolen from https://gist.github.com/703512
-{
-    o = -log10(sort(pvector,decreasing=F))
-    e = -log10( 1:length(o)/length(o) )
-    o[o==-Inf] = reallysmall
-    o[o==Inf] = reallybig
-    maint = descr
-    pdf(outpdf)
-    plot(e,o,pch=19,cex=1, main=maint, ...,
-        xlab=expression(Expected~~-log[10](italic(p))),
-        ylab=expression(Observed~~-log[10](italic(p))),
-        xlim=c(0,max(e)), ylim=c(0,max(o)))
-    lines(e,e,col="red")
-    grid(col = "lightgray", lty = "dotted")
-    dev.off()
-}
-
-smearPlot = function(myDGEList,deTags, outSmear, outMain)
-        {
-        pdf(outSmear)
-        plotSmear(myDGEList,de.tags=deTags,main=outMain)
-        grid(col="lightgray", lty="dotted")
-        dev.off()
-        }
-        
-boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
-{    
-        nc = ncol(rawrs)
-        ##### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
-        fullnames = colnames(rawrs)
-        newcolnames = substr(colnames(rawrs),1,20)
-        colnames(rawrs) = newcolnames
-        newcolnames = substr(colnames(cleanrs),1,20)
-        colnames(cleanrs) = newcolnames
-        defpar = par(no.readonly=T)
-        print.noquote('@@@ Raw contig counts by sample:')
-        print.noquote(summary(rawrs))
-        print.noquote('@@@ Library size contig counts by sample:')
-        print.noquote(summary(cleanrs))
-        pdf(pdfname)
-        par(mfrow=c(1,2))
-        boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main='log2 raw counts')
-        grid(col="lightgray",lty="dotted")
-        boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('log2 counts after ',maint))
-        grid(col="lightgray",lty="dotted")
-        dev.off()
-        pdfname = "sample_counts_histogram.pdf" 
-        nc = ncol(rawrs)
-        print.noquote(paste('Using ncol rawrs=',nc))
-        ncroot = round(sqrt(nc))
-        if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
-        m = c()
-        for (i in c(1:nc)) {
-              rhist = hist(rawrs[,i],breaks=100,plot=F)
-              m = append(m,max(rhist\$counts))
-             }
-        ymax = max(m)
-        ncols = length(fullnames)
-        if (ncols > 20) 
-        {
-           scale = 7*ncols/20
-           pdf(pdfname,width=scale,height=scale)
-        } else { 
-           pdf(pdfname)
-        }
-        par(mfrow=c(ncroot,ncroot))
-        for (i in c(1:nc)) {
-                 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", 
-                 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
-             }
-        dev.off()
-        par(defpar)
-      
-}
-
-cumPlot = function(rawrs,cleanrs,maint,myTitle)
-{   # updated to use ecdf
-        pdfname = "Differential_rowsum_bar_charts.pdf"
-        defpar = par(no.readonly=T)
-        lrs = log(rawrs,10) 
-        lim = max(lrs)
-        pdf(pdfname)
-        par(mfrow=c(2,1))
-        hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)",
-             ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
-        grid(col="lightgray", lty="dotted")
-        lrs = log(cleanrs,10) 
-        hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)",
-             ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
-        grid(col="lightgray", lty="dotted")
-        dev.off()
-        par(defpar)
-}
-
-cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
-{   # updated to use ecdf
-        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
-        pdf(pdfname)
-        par(mfrow=c(2,1))
-        lastx = max(rawrs)
-        rawe = knots(ecdf(rawrs))
-        cleane = knots(ecdf(cleanrs))
-        cy = 1:length(cleane)/length(cleane)
-        ry = 1:length(rawe)/length(rawe)
-        plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
-             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
-        grid(col="blue")
-        plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
-             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
-        grid(col="blue")
-        dev.off()
-}
-
-
-
-doGSEAold = function(y=NULL,design=NULL,histgmt="",
-                  bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-                  ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
-{
-  sink('Camera.log')
-  genesets = c()
-  if (bigmt > "")
-  {
-    bigenesets = readLines(bigmt)
-    genesets = bigenesets
-  }
-  if (histgmt > "")
-  {
-    hgenesets = readLines(histgmt)
-    if (bigmt > "") {
-      genesets = rbind(genesets,hgenesets)
-    } else {
-      genesets = hgenesets
-    } # use only history if no bi
-  }
-  print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
-  genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
-  outf = outfname
-  head=paste(myTitle,'edgeR GSEA')
-  write(head,file=outfname,append=F)
-  ntest=length(genesets)
-  urownames = toupper(rownames(y))
-  upcam = c()
-  downcam = c()
-  for (i in 1:ntest) {
-    gs = unlist(genesets[i])
-    g = gs[1] # geneset_id
-    u = gs[2]
-    if (u > "") { u = paste("",u," ",sep="") }
-    glist = gs[3:length(gs)] # member gene symbols
-    glist = toupper(glist)
-    inglist = urownames %in% glist
-    nin = sum(inglist)
-    if ((nin > minnin) && (nin < maxnin)) {
-      ### print(paste('@@found',sum(inglist),'genes in glist'))
-      camres = camera(y=y,index=inglist,design=design)
-      if (! is.null(camres)) {
-      rownames(camres) = g # gene set name
-      camres = cbind(GeneSet=g,URL=u,camres)
-      if (camres\$Direction == "Up") 
-        {
-        upcam = rbind(upcam,camres) } else {
-          downcam = rbind(downcam,camres)
-        }
-      }
-   }
-  }
-  uscam = upcam[order(upcam\$PValue),]
-  unadjp = uscam\$PValue
-  uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
-  dscam = downcam[order(downcam\$PValue),]
-  unadjp = dscam\$PValue
-  dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
-  write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
-  write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
-  write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
-  sink()
-}
-
-
-
-
-doGSEA = function(y=NULL,design=NULL,histgmt="",
-                  bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-                  ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
-{
-  sink('Camera.log')
-  genesets = c()
-  if (bigmt > "")
-  {
-    bigenesets = readLines(bigmt)
-    genesets = bigenesets
-  }
-  if (histgmt > "")
-  {
-    hgenesets = readLines(histgmt)
-    if (bigmt > "") {
-      genesets = rbind(genesets,hgenesets)
-    } else {
-      genesets = hgenesets
-    } # use only history if no bi
-  }
-  print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
-  genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
-  outf = outfname
-  head=paste(myTitle,'edgeR GSEA')
-  write(head,file=outfname,append=F)
-  ntest=length(genesets)
-  urownames = toupper(rownames(y))
-  upcam = c()
-  downcam = c()
-  incam = c()
-  urls = c()
-  gsids = c()
-  for (i in 1:ntest) {
-    gs = unlist(genesets[i])
-    gsid = gs[1] # geneset_id
-    url = gs[2]
-    if (url > "") { url = paste("",url," ",sep="") }
-    glist = gs[3:length(gs)] # member gene symbols
-    glist = toupper(glist)
-    inglist = urownames %in% glist
-    nin = sum(inglist)
-    if ((nin > minnin) && (nin < maxnin)) {
-      incam = c(incam,inglist)
-      gsids = c(gsids,gsid)
-      urls = c(urls,url)
-    }
-  }
-  incam = as.list(incam)
-  names(incam) = gsids
-  allcam = camera(y=y,index=incam,design=design)
-  allcamres = cbind(geneset=gsids,allcam,URL=urls)
-  for (i in 1:ntest) {
-    camres = allcamres[i]
-    res = try(test = (camres\$Direction == "Up"))
-    if ("try-error" %in% class(res)) {
-      cat("test failed, camres = :")
-      print.noquote(camres)
-    } else  { if (camres\$Direction == "Up")
-    {  upcam = rbind(upcam,camres)
-    } else { downcam = rbind(downcam,camres)
-    }
-              
-    }
-  }
-  uscam = upcam[order(upcam\$PValue),]
-  unadjp = uscam\$PValue
-  uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
-  dscam = downcam[order(downcam\$PValue),]
-  unadjp = dscam\$PValue
-  dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
-  ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
-  write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
-  write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
-  print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
-  write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
-  sink()
-  }
-
-
-edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5, 
-        fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
-        filterquantile=0.2, subjects=c(),TreatmentName="Rx",ControlName="Ctrl",mydesign=NULL,
-        doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
-        histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
-        doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') 
-{
-
-logf = file('Differential.log', open = "a")
-sink(logf,type = c("output", "message"))
-
-
-run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod)
-{
-  logf = file('edgeR.log', open = "a")
-  sink(logf,type = c("output", "message"))
-  #### Setup myDGEList object
-  myDGEList = DGEList(counts=workCM, group = group)
-  myDGEList = calcNormFactors(myDGEList)
-  if (robust_meth == 'ordinary') {
-       myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
-       myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
-       if (priordf > 0) {  myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf) 
-       } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) }
-       comdisp = myDGEList\$common.dispersion
-       estpriorn = getPriorN(myDGEList)
-       print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
-     } else { 
-       myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth)
-          }
-    
-  
-  DGLM = glmFit(myDGEList,design=mydesign)
-  DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
-  normData = cpm(myDGEList)
-  uoutput = cbind( 
-    Name=as.character(rownames(myDGEList\$counts)),
-    DE\$table,
-    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-    Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
-    myDGEList\$counts
-  )
-  soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable
-  goodness = gof(DGLM, pcutoff=fdrthresh)
-  if (sum(goodness\$outlier) > 0) {
-    print.noquote('GLM outliers:')
-    print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
-    } else { 
-      print('No GLM fit outlier genes found\n')
-    }
-  z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
-  pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_'))
-  qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
-  abline(0,1,lwd=3)
-  points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
-  dev.off()
-  uniqueg = unique(group)
-  write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
-  tt = cbind( 
-    Name=as.character(rownames(myDGEList)),
-    DE\$table,
-    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
-    Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums
-  )
-  tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
-  stt = tt[order(DE\$table\$PValue),]
-  print.noquote("@@ edgeR Top tags\n")
-  print.noquote(stt[1:50,])
-  deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
-  nsig = length(deTags)
-  print.noquote(paste('@@',nsig,'tags significant at adj p=',fdrthresh))
-  deColours = ifelse(deTags,'red','black')
-  pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_"))
-  plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance")
-  dev.off()
-  dg = myDGEList[order(DE\$table\$PValue),]
-  outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_")
-  ocpm = normData[order(DE\$table\$PValue),]
-  ocpm = ocpm[c(1:100),]
-  hmap2(ocpm,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
-  outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_")
-  outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
-  smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
-  qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_'))
-  topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
-  edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
-  edgeRcounts = rep(0, length(allgenes))
-  edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
-  sink()
-  return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts))
-} ### run_edgeR
-
-
-run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType)
-
- {
-    logf = file("DESeq2.log", open = "a")
-    sink(logf,type = c("output", "message"))
-    # DESeq2
-    require('DESeq2')
-    library('RColorBrewer')
-    if (length(subjects) == 0)
-        {
-        pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
-        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
-        } else {
-        pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
-        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
-        }
-    deSeqDatsizefac = estimateSizeFactors(deSEQds)
-    deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
-    resDESeq = nbinomWaldTest(deSeqDatdisp)
-    rDESeq = as.data.frame(results(resDESeq))
-    rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
-    srDESeq = rDESeq[order(rDESeq\$pvalue),]
-    qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_"))
-    cat("# DESeq top 50\n")
-    print.noquote(srDESeq[1:50,])
-    write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F)
-    topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
-    DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
-    DESeqcounts = rep(0, length(allgenes))
-    DESeqcounts[DESeqcountsindex] = 1
-    pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_'))
-    plotDispEsts(resDESeq)
-    dev.off()
-    ysmall = abs(min(rDESeq\$log2FoldChange))
-    ybig = abs(max(rDESeq\$log2FoldChange))
-    ylimit = min(4,ysmall,ybig)
-    pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_"))
-    plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit))
-    dev.off()
-    rlogres = rlogTransformation(resDESeq)
-    sampledists = dist( t( assay(rlogres) ) )
-    sdmat = as.matrix(sampledists)
-    pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_"))
-    heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
-         col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
-    dev.off()
-    result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
-    if ("try-error" %in% class(result)) {
-         print.noquote('DESeq2 plotPCA failed.')
-         } else {
-         pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_"))
-         #### wtf - print? Seems needed to get this to work
-         print(ppca)
-         dev.off()
-        }
-    sink()
-    return(DESeqcounts)
-  }
-
-
-run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom)
-  {
-    logf = file('VOOM.log', open = "a")
-    sink(logf,type = c("output", "message")) 
-    if (doedgeR == F) {
-        #### Setup myDGEList object
-        myDGEList = DGEList(counts=workCM, group = group)
-        myDGEList = calcNormFactors(myDGEList)
-        myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
-        myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) 
-        myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
-    }
-    pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
-    dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
-    dev.off()
-    # Use limma to fit data
-    fit = lmFit(dat.voomed, mydesign)
-    fit = eBayes(fit)
-    rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
-    qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
-    rownames(rvoom) = rownames(workCM)
-    rvoom = cbind(Contig=rownames(workCM),rvoom,NReads=cmrowsums,URL=contigurls)
-    srvoom = rvoom[order(rvoom\$P.Value),]
-    cat("# VOOM top 50\n")
-    print(srvoom[1:50,])
-    write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
-    # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
-    topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
-    voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
-    voomcounts = rep(0, length(allgenes))
-    voomcounts[voomcountsindex] = 1
-    sink()
-    return(voomcounts)
-    }
-
-
-#### data cleaning and analsis control starts here
-
-
-  # Error handling
-  nugroup = length(unique(group))
-  if (nugroup!=2){
-    print("Number of conditions identified in experiment does not equal 2")
-    q()
-    }
-  require(edgeR)
-  options(width = 512) 
-  mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
-  allN = nrow(Count_Matrix)
-  nscut = round(ncol(Count_Matrix)/2) # half samples
-  colTotmillionreads = colSums(Count_Matrix)/1e6
-  counts.dataframe = as.data.frame(c()) 
-  rawrs = rowSums(Count_Matrix)
-  nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
-  nzN = nrow(nonzerod)
-  nzrs = rowSums(nonzerod)
-  zN = allN - nzN
-  print('@@@ Quantiles for non-zero row counts:',quote=F)
-  print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
-  if (useNDF == T)
-  {
-    gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
-    lo = colSums(Count_Matrix[!gt1rpin3,])
-    workCM = Count_Matrix[gt1rpin3,]
-    cleanrs = rowSums(workCM)
-    cleanN = length(cleanrs)
-    meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
-    print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
-    maint = paste('Filter >=1/million reads in >=',nscut,'samples')
-  }   else {        
-    useme = (nzrs > quantile(nzrs,filterquantile))
-    workCM = nonzerod[useme,]
-    lo = colSums(nonzerod[!useme,])
-    cleanrs = rowSums(workCM)
-    cleanN = length(cleanrs)
-    meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
-    print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
-    maint = paste('Filter below',filterquantile,'quantile')
-  }
-  cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
-  allgenes = rownames(workCM)
-  reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp
-  genecards=" 0.8) # is ucsc style string
-  {
-    print("@@ using ucsc substitution for urls")
-    contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes," ")
-  } else {
-    print("@@ using genecards substitution for urls")
-    contigurls = paste0(genecards,allgenes,"\'>",allgenes,"")
-  }
-  print.noquote(paste("@@ Total low count contigs per sample = ",paste(table(lo),collapse=','))) 
-  cmrowsums = rowSums(workCM)
-  TName=unique(group)[1]
-  CName=unique(group)[2]
-  if (is.null(mydesign)) {
-    if (length(subjects) == 0) 
-    {
-      mydesign = model.matrix(~group)
-    } 
-    else { 
-      subjf = factor(subjects)
-      mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it
-    }
-  } 
-  print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
-  print.noquote('Using design matrix:')
-  print.noquote(mydesign)
-  normData = cpm(workCM)*1e6
-  colnames(normData) = paste( colnames(workCM),'N',sep="_")
-  print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
-
-  if (doedgeR == T) {
-      eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod)
-      myDGEList = eres\$myDGEList
-      edgeRcounts = eres\$edgeRcounts
-      #### Plot MDS
-      sample_colors =  match(group,levels(group))
-      sampleTypes = levels(factor(group))
-      print.noquote(sampleTypes)
-      pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
-      plotMDS.DGEList(myDGEList,main=paste("MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
-      legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
-      grid(col="blue")
-      dev.off()
-      scale <- myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
-      normCounts <- round(t(t(myDGEList\$counts)/scale)*mean(scale))
-      try({boxPlot(rawrs=nzd,cleanrs=log2(normCounts+1),maint='Effects of TMM size normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_'))},T)
-   }
-  if (doDESeq2 == T) {  DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) }
-  if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) }
-
-
-  if (doCamera) {
-  doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
-    outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
-  }
-  counts.dataframe = c()
-  vennmain = 'no venn'
-  if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) {
-    if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) {
-        vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
-        counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, 
-                                       VOOM_limma = voomcounts, row.names = allgenes)
-       } else if ((doDESeq2==T) && (doedgeR==T))  {
-         vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
-         counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
-       } else if ((doVoom==T) && (doedgeR==T)) {
-        vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
-        counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
-       }
-    
-    if (nrow(counts.dataframe > 1)) {
-      counts.venn = vennCounts(counts.dataframe)
-      vennf = paste("Differential_venn",mt,"significant_genes_overlap.pdf",sep="_") 
-      pdf(vennf)
-      vennDiagram(counts.venn,main=vennmain,col="maroon")
-      dev.off()
-    }
-  } #### doDESeq2 or doVoom
-sink()
-}
-#### Done
-]]>
-builtin_gmt = ""
-history_gmt = ""
-history_gmt_name = ""
-out_edgeR = F
-out_DESeq2 = F
-out_Voom = "$out_VOOM"
-edgeR_robust_meth = "ordinary" 
-doDESeq2 = $DESeq2.doDESeq2
-doVoom = $doVoom
-doCamera = F
-doedgeR = $edgeR.doedgeR
-edgeR_priordf = 10
-
-
-#if $doVoom == "T":
-  out_Voom = "$out_VOOM"
-#end if
-
-#if $DESeq2.doDESeq2 == "T":
-  out_DESeq2 = "$out_DESeq2"
-  doDESeq2 = T
-  DESeq_fitType = "$DESeq2.DESeq_fitType"
-#end if
-
-#if $edgeR.doedgeR == "T":
-  out_edgeR = "$out_edgeR"
-  edgeR_priordf = $edgeR.edgeR_priordf  
-  edgeR_robust_meth = "$edgeR.edgeR_robust_method"
-#end if
-
-
-if (sum(c(doedgeR,doVoom,doDESeq2)) == 0)
-{
-write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr())
-quit(save="no",status=2)
-}
-
-Out_Dir = "$html_file.files_path"
-Input =  "$input1"
-TreatmentName = "$treatment_name"
-TreatmentCols = "$Treat_cols"
-ControlName = "$control_name"
-ControlCols= "$Control_cols"
-org = "$input1.dbkey"
-if (org == "") { org = "hg19"}
-fdrtype = "$fdrtype"
-fdrthresh = $fdrthresh
-useNDF = $useNDF
-fQ = $fQ # non-differential centile cutoff
-myTitle = "$title"
-sids = strsplit("$subjectids",',')
-subjects = unlist(sids)
-nsubj = length(subjects)
-TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
-CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 
-cat('Got TCols=')
-cat(TCols)
-cat('; CCols=')
-cat(CCols)
-cat('\n')
-  0 & nsubj != nsamples) {
-options("show.error.messages"=T)
-mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
-   'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
-write(mess, stderr())
-quit(save="no",status=4)
-}
-if (length(subjects) != 0) {subjects = subjects[useCols]}
-Count_Matrix = Count_Matrix[,useCols] ### reorder columns
-rn = rownames(Count_Matrix)
-islib = rn %in% c('librarySize','NotInBedRegions')
-LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
-Count_Matrix = Count_Matrix[subset(rn,! islib),]
-group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )             
-group = factor(group, levels=c(ControlName,TreatmentName))
-colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")        
-results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2,
-                 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
-                 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,TreatmentName=TreatmentName,ControlName=ControlName,
-                 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
-                 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth)
-sessionInfo()
-
-sink()
-]]>
- 
- 
-
-
-**What it does**
-
-Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes.
-Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
-
-**Input**
-
-Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
-and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the 
-non-negative integer count of reads from one sample overlapping the feature.
-
-The matrix must have a header row uniquely identifying the source samples, and unique row names in 
-the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
-They must be unique and R names or they will be mangled - please read the fine R docs for the rules on identifiers. 
-
-**Specifying comparisons**
-
-This is basically dumbed down for two factors - case vs control.
-
-More complex interfaces are possible but painful at present. 
-Probably need to specify a phenotype file to do this better.
-Work in progress. Send code.
-
-If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
-put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or 
-A list of integers, one for each subject or an empty string if samples are all independent.
-If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
-Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
-
-So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
-eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
-8,9,1,1,2,2 
-as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
-
-**Methods available**
-
-You can run 3 popular Bioconductor packages available for count data.
-
-edgeR - see edgeR_ for details
-
-VOOM/limma - see limma_VOOM_ for details
-
-DESeq2 - see DESeq2_ for details
-
-and optionally camera in edgeR which works better if MSigDB is installed.
-
-**Outputs**
-
-Some helpful plots and analysis results. Note that most of these are produced using R code 
-suggested by the excellent documentation and vignettes for the Bioconductor
-packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy.
-
-**Note on Voom**
-
-The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method.
-
-This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma.
-
-voom is an acronym for mean-variance modelling at the observational level.
-The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation.
-Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend.
-This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance.
-The weights are then used in the linear modelling process to adjust for heteroscedasticity.
-
-In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess.
-The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag.
-The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag.
-Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays.
-Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation.
-This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data.
-
-
-Author(s)
-
-Charity Law and Gordon Smyth
-
-References
-
-Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia.
-
-Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts.
-Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia.
-http://www.statsci.org/smyth/pubs/VoomPreprint.pdf
-
-See Also
-
-A voom case study is given in the edgeR User's Guide.
-
-vooma is a similar function but for microarrays instead of RNA-seq.
-
-
-***old rant on changes to Bioconductor package variable names between versions***
-
-The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) 
-breaking this and all other code that assumed the old name for this variable, 
-between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). 
-This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing 
-to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly
-when their old scripts break. This tool currently now works with 2.4.6.
-
-**Note on prior.N**
-
-http://seqanswers.com/forums/showthread.php?t=5591 says:
-
-*prior.n*
-
-The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. 
-You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood 
-in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your 
-tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the 
-common likelihood the weight of one observation.
-
-In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, 
-or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that 
-you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation 
-(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? 
-What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. 
-If you have more samples, then the tagwise dispersion estimates will be more reliable, 
-so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. 
-
-
-From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
-
-Dear Dorota,
-
-The important settings are prior.df and trend.
-
-prior.n and prior.df are related through prior.df = prior.n * residual.df,
-and your experiment has residual.df = 36 - 12 = 24.  So the old setting of
-prior.n=10 is equivalent for your data to prior.df = 240, a very large
-value.  Going the other way, the new setting of prior.df=10 is equivalent
-to prior.n=10/24.
-
-To recover old results with the current software you would use
-
-  estimateTagwiseDisp(object, prior.df=240, trend="none")
-
-To get the new default from old software you would use
-
-  estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
-
-Actually the old trend method is equivalent to trend="loess" in the new
-software. You should use plotBCV(object) to see whether a trend is
-required.
-
-Note you could also use
-
-  prior.n = getPriorN(object, prior.df=10)
-
-to map between prior.df and prior.n.
-
-----
-
-**Attributions**
-
-edgeR - edgeR_ 
-
-VOOM/limma - limma_VOOM_ 
-
-DESeq2 - DESeq2_ for details
-
-See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package.
-
-Galaxy_ (that's what you are using right now!) for gluing everything together 
-
-Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is 
-licensed to you under the LGPL_ like other rgenetics artefacts
-
-.. _LGPL: http://www.gnu.org/copyleft/lesser.html
-.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
-.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
-.. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html
-.. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html
-.. _Galaxy: http://getgalaxy.org
- 
-
- 
-
-
diff -r 0de946608423 -r d6fbd1cf7768 test-data/edgeRtest1out.html
--- a/test-data/edgeRtest1out.html	Tue Jan 06 02:20:55 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,621 +0,0 @@
- 
-         
-          
-        
-
Galaxy Tool "Differential_Counts" run at 28/12/2014 21:02:37
-
DESeq2 images and outputs
-(Click on a thumbnail image to download the corresponding original PDF image)
-
-
- 
-
-  
- 
-
-
DESeq2 log output
-
-
-
-# DESeq top 50
-
-                     Contig     baseMean log2FoldChange     lfcSE       stat        pvalue          padj  NReads                                                                                               URL
-
-Mir192               Mir192 271352.97636       6.965264 0.2150593  32.387646 4.096935e-230 3.818343e-227 2325567               Mir192 
-
-Mir122a             Mir122a  10112.31117      10.312083 0.3292695  31.318061 2.649329e-215 1.234587e-212   90428             Mir122a 
-
-Mir149               Mir149    810.35429      -6.911118 0.2341392 -29.517132 1.735536e-191 5.391733e-189    6164               Mir149 
-
-Mir23a               Mir23a   1289.18043      -3.104086 0.1191688 -26.047815 1.424245e-149 3.318491e-147   10118               Mir23a 
-
-Mir181d             Mir181d    275.22797      -3.581172 0.1778187 -20.139461  3.329373e-90  6.205952e-88    2139             Mir181d 
-
-Mir204               Mir204    347.57397      -7.284200 0.3771119 -19.315751  3.959346e-83  6.150183e-81    2601               Mir204 
-
-Mir23b               Mir23b   2028.55377      -2.065110 0.1085802 -19.019217  1.182361e-80  1.574229e-78   16387               Mir23b 
-
-Mir27a               Mir27a   2788.72629      -3.016676 0.1688167 -17.869539  2.036708e-71  2.372765e-69   21886               Mir27a 
-
-Mir195               Mir195    519.86200      -3.152795 0.1784796 -17.664734  7.838131e-70  8.116820e-68    3962               Mir195 
-
-Mir194-2           Mir194-2    391.65678       5.222911 0.3099275  16.852045  1.013492e-63  9.445744e-62    3570           Mir194-2 
-
-Mir208b             Mir208b   1649.77924     -11.396172 0.6771238 -16.830264  1.464482e-63  1.240816e-61   14756             Mir208b 
-
-Mir10b               Mir10b  27820.40551      -5.071453 0.3044884 -16.655656  2.753110e-62  2.138249e-60  197340               Mir10b 
-
-Mir181c             Mir181c   2765.96510      -3.660964 0.2275711 -16.087120  3.141152e-58  2.251965e-56   23605             Mir181c 
-
-Mir208a             Mir208a    616.76981     -10.356524 0.6559218 -15.789267  3.688391e-56  2.455415e-54    4638             Mir208a 
-
-Mir490               Mir490    220.99790      -8.059660 0.5142876 -15.671504  2.369067e-55  1.471980e-53    1741               Mir490 
-
-Mir203               Mir203    772.92882       1.990849 0.1274099  15.625546  4.877239e-55  2.840992e-53    6739               Mir203 
-
-Mir215               Mir215    152.78082      -3.004380 0.1939090 -15.493765  3.822341e-54  2.095542e-52    1182               Mir215 
-
-Dnm3os               Dnm3os    179.61643      -3.278392 0.2166491 -15.132265  9.922045e-52  5.137415e-50    1401               Dnm3os 
-
-Mir214               Mir214    134.69038      -3.216444 0.2154916 -14.926074  2.230149e-50  1.093947e-48    1048               Mir214 
-
-Mir21                 Mir21  26121.31011       2.963903 0.2008617  14.755939  2.817433e-49  1.312924e-47  229120                 Mir21 
-
-Mir1948             Mir1948    263.89527       7.074045 0.4867226  14.534039  7.374076e-48  3.272685e-46    2404             Mir1948 
-
-Mir27b               Mir27b  76478.05753      -1.904653 0.1312889 -14.507339  1.088626e-47  4.611815e-46  625308               Mir27b 
-
-Rabggtb             Rabggtb   2257.19195       1.988368 0.1401741  14.184987  1.134862e-45  4.598659e-44   19535             Rabggtb 
-
-Mir499               Mir499    712.45950     -10.577061 0.7528467 -14.049423  7.766426e-45  3.015962e-43    6527               Mir499 
-
-Mir101b             Mir101b   6846.19683       3.791681 0.2809666  13.495132  1.670548e-41  6.227801e-40   59019             Mir101b 
-
-Mir132               Mir132    106.46062      -2.797928 0.2083376 -13.429779  4.046171e-41  1.450397e-39     857               Mir132 
-
-Mir143hg           Mir143hg 180217.77425      -2.169143 0.1685614 -12.868566  6.764677e-38  2.335066e-36 1407364           Mir143hg 
-
-Mir143               Mir143 179219.35960      -2.170303 0.1696199 -12.795094  1.746402e-37  5.813025e-36 1399819               Mir143 
-
-Mir155               Mir155     57.66182      -3.788079 0.3056585 -12.393175  2.845516e-35  9.144898e-34     463               Mir155 
-
-Mir322               Mir322    899.53469      -3.126011 0.2622596 -11.919531  9.363380e-33  2.908890e-31    7074               Mir322 
-
-Mir378               Mir378    483.21548      -2.994300 0.2577321 -11.617876  3.343461e-31  1.005195e-29    4075               Mir378 
-
-Mir24-2             Mir24-2    424.48288      -2.712674 0.2361028 -11.489378  1.491830e-30  4.213289e-29    3470             Mir24-2 
-
-Mir3074-2         Mir3074-2    424.48288      -2.712674 0.2361028 -11.489378  1.491830e-30  4.213289e-29    3470         Mir3074-2 
-
-Mir199b             Mir199b     47.84725      -5.294373 0.4644474 -11.399295  4.215163e-30  1.155451e-28     370             Mir199b 
-
-Mir802               Mir802    166.83414       8.816580 0.7782636  11.328527  9.478530e-30  2.523997e-28    1514               Mir802 
-
-Mir125b-2         Mir125b-2    493.08516      -2.919341 0.2631193 -11.095122  1.324798e-28  3.429754e-27    3837         Mir125b-2 
-
-Mir301               Mir301    260.53406      -1.676984 0.1526772 -10.983852  4.570133e-28  1.151179e-26    2119               Mir301 
-
-Snord104           Snord104   3851.90119       2.386573 0.2173857  10.978522  4.847915e-28  1.189015e-26   33458           Snord104 
-
-Mir150               Mir150    553.20599      -2.836881 0.2595088 -10.931734  8.127991e-28  1.942381e-26    4229               Mir150 
-
-Mir148a             Mir148a 118994.46955       2.678852 0.2481801  10.793984  3.675045e-27  8.562855e-26 1002397             Mir148a 
-
-5430416N02Rik 5430416N02Rik     62.15966       3.089960 0.2941123  10.506053  8.101331e-26  1.841571e-24     564 5430416N02Rik 
-
-Mir193               Mir193     45.70861       4.991530 0.4814098  10.368568  3.446495e-25  7.647936e-24     421               Mir193 
-
-Mir3073             Mir3073     98.93199       8.208709 0.7944742  10.332254  5.036321e-25  1.091593e-23     904             Mir3073 
-
-Mir125b-1         Mir125b-1     79.01988      -3.020660 0.2937360 -10.283590  8.355635e-25  1.769875e-23     609         Mir125b-1 
-
-2610203C20Rik 2610203C20Rik     79.17666      -3.023491 0.2948614 -10.253939  1.136165e-24  2.353124e-23     610 2610203C20Rik 
-
-Mir181a-1         Mir181a-1     59.53826      -3.151487 0.3211628  -9.812740  9.923710e-23  2.010630e-21     506         Mir181a-1 
-
-Mir184               Mir184     32.23796      -4.865023 0.4962776  -9.803028  1.092606e-22  2.166615e-21     247               Mir184 
-
-Mir199a-2         Mir199a-2     44.84878      -3.422216 0.3545647  -9.651880  4.826276e-22  9.371019e-21     352         Mir199a-2 
-
-Mir182               Mir182    886.79583       4.919630 0.5101689   9.643140  5.255515e-22  9.996204e-21    7189               Mir182 
-
-Snord91a           Snord91a    168.95251       2.700421 0.2835464   9.523738  1.670595e-21  3.113990e-20    1437           Snord91a 
-
-
- 
-
-
Differential images and outputs
-(Click on a thumbnail image to download the corresponding original PDF image)
-
-
- 
-
-
-
-
Differential log output
-
-
-
-[1] @@@ Quantiles for non-zero row counts:
-
-       0%       10%       20%       30%       40%       50%       60%       70%       80%       90%      100% 
-
-      1.0       1.0       2.0       3.0       4.0       8.0      13.0      24.0      86.6     753.0 2325567.0 
-
-[1] Read 3242 contigs. Removed 1494 with no reads. After filtering at count quantile =0.3, there are 1141 contigs
-
-[1] "@@ using genecards substitution for urls"
-
-[1] @@ Total low count contigs per sample =  1,1,1,1,1,1,1,1
-
-[1] Using samples: liver_X11706Liv_CAAAAG_L003_R1_001_trimmed.fastq_bwa.sam.bam,liver_X11700Liv_ATTCCT_L003_R1_001_trimmed.fastq_bwa.sam.bam,liver_X11698Liv_ACTGAT_L003_R1_001_trimmed.fastq_bwa.sam.bam,liver_X11699Liv_ATGAGC_L003_R1_001_trimmed.fastq_bwa.sam.bam,heart_X11706He_AGTTCC_L001_R1_001_trimmed.fastq_bwa.sam.bam,heart_X11699He_GGCTAC_L001_R1_001_trimmed.fastq_bwa.sam.bam,heart_X11698He_TAGCTT_L001_R1_001_trimmed.fastq_bwa.sam.bam,heart_X11700He_CTTGTA_L001_R1_001_trimmed.fastq_bwa.sam.bam
-
-[1] Using design matrix:
-
-  (Intercept) groupliver
-
-1           1          1
-
-2           1          1
-
-3           1          1
-
-4           1          1
-
-5           1          0
-
-6           1          0
-
-7           1          0
-
-8           1          0
-
-attr(,"assign")
-
-[1] 0 1
-
-attr(,"contrasts")
-
-attr(,"contrasts")$group
-
-[1] contr.treatment
-
-[1] "Raw sample read totals 2443751,1644652,1682104,1806045,1440960,1341813,2888924,1428365"
-
-[1] heart liver
-
-
- 
-
-
Differential log output
-
-
-
-Attaching package: ‘gplots’
-
-The following object is masked from ‘package:stats’:
-
-    lowess
-
-Loading required package: methods
-
-Loading required package: limma
-
-Loading required package: splines
-
-Loading required package: DESeq2
-
-Loading required package: GenomicRanges
-
-Loading required package: BiocGenerics
-
-Loading required package: parallel
-
-Attaching package: ‘BiocGenerics’
-
-The following objects are masked from ‘package:parallel’:
-
-    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB
-
-The following object is masked from ‘package:limma’:
-
-    plotMA
-
-The following object is masked from ‘package:stats’:
-
-    xtabs
-
-The following objects are masked from ‘package:base’:
-
-    anyDuplicated, append, as.data.frame, as.vector, cbind, colnames, duplicated, eval, evalq, Filter, Find, get, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int, rownames, sapply, setdiff, sort, table, tapply, union, unique, unlist
-
-Loading required package: IRanges
-
-Attaching package: ‘IRanges’
-
-The following object is masked from ‘package:gplots’:
-
-    space
-
-Loading required package: XVector
-
-Loading required package: Rcpp
-
-Loading required package: RcppArmadillo
-
-gene-wise dispersion estimates
-
-mean-dispersion relationship
-
-final dispersion estimates
-
-you had estimated gene-wise dispersions, removing these
-
-you had estimated fitted dispersions, removing these
-
-you had estimated gene-wise dispersions, removing these
-
-you had estimated fitted dispersions, removing these
-
-Warning message:
-
-closing unused connection 4 (edgeR.log) 
-
-Warning message:
-
-In sink() : no sink to remove
-
-
- 
-
-
VOOM images and outputs
-(Click on a thumbnail image to download the corresponding original PDF image)
-
-
- 
-
-
-
-
VOOM log output
-
-
-
-# VOOM top 50
-
-                     Contig      logFC    AveExpr          t      P.Value    adj.P.Val         B  NReads                                                                                               URL
-
-Mir192               Mir192   6.689950 14.4417888  50.335160 1.802287e-16 2.056409e-13 27.414844 2325567               Mir192 
-
-Mir208a             Mir208a -10.458438  3.8918506 -29.183545 2.249812e-13 1.283518e-10 19.141041    4638             Mir208a 
-
-Mir3073             Mir3073   8.318578  2.6485638  25.821264 1.102217e-12 4.192097e-10 18.063600     904             Mir3073 
-
-Mir802               Mir802   8.992449  2.9857711  25.195575 1.514327e-12 4.319618e-10 17.906674    1514               Mir802 
-
-Mir208b             Mir208b -12.256447  4.4678897 -22.360114 7.074494e-12 1.614400e-09 16.920424   14756             Mir208b 
-
-Mir499               Mir499 -11.104485  3.8066799 -21.990054 8.769804e-12 1.667724e-09 16.728874    6527               Mir499 
-
-Mir10b               Mir10b  -4.775768 12.4173688 -21.487387 1.180685e-11 1.924516e-09 17.249348  197340               Mir10b 
-
-Mir148a             Mir148a   2.751538 15.4237642  20.289553 2.464883e-11 3.515539e-09 16.455471 1002397             Mir148a 
-
-Mir490               Mir490  -8.497742  3.6613221 -18.336110 8.980482e-11 1.138526e-08 13.923237    1741               Mir490 
-
-Mir122a             Mir122a  10.197963  8.1512374  17.467826 1.663427e-10 1.897970e-08 14.215445   90428             Mir122a 
-
-Mir133b             Mir133b  -6.172367  1.3497975 -17.274094 1.916064e-10 1.987481e-08 13.840201     159             Mir133b 
-
-Mir149               Mir149  -7.041176  6.0886889 -16.861286 2.602547e-10 2.474589e-08 13.714880    6164               Mir149 
-
-Mir101b             Mir101b   3.837883 10.6216725  15.443054 7.873164e-10 6.910215e-08 13.054350   59019             Mir101b 
-
-Mir143               Mir143  -1.912927 16.0353646 -14.922755 1.209475e-09 9.857220e-08 12.374988 1399819               Mir143 
-
-Mir194-2           Mir194-2   5.534694  6.2627211  14.703097 1.455682e-09 1.107289e-07 12.316769    3570           Mir194-2 
-
-Mir23a               Mir23a  -2.905961  8.6431895 -14.558394 1.646894e-09 1.174441e-07 12.339306   10118               Mir23a 
-
-Mir1983             Mir1983  -5.612359  1.1061384 -14.266537 2.119488e-09 1.422551e-07 11.743589     101             Mir1983 
-
-Mir27a               Mir27a  -2.849084 10.0939084 -14.158498 2.329669e-09 1.476752e-07 11.960195   21886               Mir27a 
-
-Cyp3a25             Cyp3a25   6.312461  1.6425308  13.845627 3.074630e-09 1.846396e-07 11.502713     226             Cyp3a25 
-
-Mir200a             Mir200a   6.129125  1.8320913  13.226966 5.410979e-09 3.086963e-07 10.979834     264             Mir200a 
-
-Mir181d             Mir181d  -3.405544  6.3702152 -13.064584 6.300369e-09 3.423201e-07 11.006301    2139             Mir181d 
-
-Mir153               Mir153  -5.698257  1.5328802 -12.832092 7.856705e-09 3.829623e-07 10.583835     140               Mir153 
-
-Mir204               Mir204  -7.718081  4.5031856 -12.808496 8.036265e-09 3.829623e-07 10.353229    2601               Mir204 
-
-Gm5441               Gm5441  -5.716851  1.5430406 -12.806028 8.055298e-09 3.829623e-07 10.562881     142               Gm5441 
-
-Rabggtb             Rabggtb   2.327908  9.9369857  12.760291 8.416902e-09 3.841474e-07 10.654006   19535             Rabggtb 
-
-Mir504               Mir504  -5.122304  0.8161671 -12.391521 1.205304e-08 5.289430e-07 10.144865      69               Mir504 
-
-Mir133a-1         Mir133a-1  -4.912497  0.7297882 -12.335045 1.274466e-08 5.385801e-07 10.076395      60         Mir133a-1 
-
-Mir195               Mir195  -2.954216  7.3530970 -12.081859 1.641098e-08 6.603731e-07 10.055879    3962               Mir195 
-
-Mir27b               Mir27b  -1.496991 14.9464877 -12.059553 1.678424e-08 6.603731e-07  9.674850  625308               Mir27b 
-
-Snord52             Snord52   2.631712  9.7652181  11.922618 1.928407e-08 7.334374e-07  9.811774   18059             Snord52 
-
-Mir322               Mir322  -3.029558  8.1188344 -11.736839 2.333148e-08 8.587488e-07  9.679815    7074               Mir322 
-
-Mir181c             Mir181c  -3.676262  9.6244506 -11.575598 2.758358e-08 9.835271e-07  9.453057   23605             Mir181c 
-
-Mir1948             Mir1948   7.101780  4.7821564  11.471202 3.077353e-08 1.033009e-06  9.233632    2404             Mir1948 
-
-0610031O16Rik 0610031O16Rik   4.519875  0.7388871  11.470939 3.078205e-08 1.033009e-06  9.284014      78 0610031O16Rik 
-
-Mir201               Mir201  -4.964105  0.7490919 -11.289794 3.729283e-08 1.210650e-06  9.114028      63               Mir201 
-
-Mir21                 Mir21   2.746616 13.2835800  11.267331 3.819755e-08 1.210650e-06  8.925217  229120                 Mir21 
-
-Mir184               Mir184  -5.569565  2.3521173 -11.190343 4.148052e-08 1.279170e-06  8.854599     247               Mir184 
-
-1810019D21Rik 1810019D21Rik   5.164581  1.0784751  11.082009 4.662057e-08 1.399844e-06  8.951908     117 1810019D21Rik 
-
-Mir203               Mir203   2.216791  8.5426169  10.866758 5.896538e-08 1.725115e-06  8.715639    6739               Mir203 
-
-1110038B12Rik 1110038B12Rik   2.383720 10.6847245  10.832157 6.125623e-08 1.744094e-06  8.573067   37066 1110038B12Rik 
-
-Snord104           Snord104   2.571210 10.4798167  10.811468 6.267120e-08 1.744094e-06  8.561110   33458           Snord104 
-
-Mir182               Mir182   5.196800  7.2088299  10.640454 7.579543e-08 2.021320e-06  8.545839    7189               Mir182 
-
-Mir547               Mir547  -4.542934  0.5799793 -10.635980 7.617593e-08 2.021320e-06  8.435473      42               Mir547 
-
-Mir143hg           Mir143hg  -2.291921 16.3789153 -10.597275 7.955395e-08 2.062978e-06  7.952584 1407364           Mir143hg 
-
-Scnn1b               Scnn1b  -4.541403  0.5700621 -10.243065 1.190487e-07 3.018546e-06  8.023327      45               Scnn1b 
-
-Mir125b-2         Mir125b-2  -2.896115  7.2737925 -10.091091 1.420082e-07 3.522420e-06  7.876068    3837         Mir125b-2 
-
-Mir1a-1             Mir1a-1  -4.402568  0.4498447  -9.950346 1.675164e-07 4.066729e-06  7.692790      42             Mir1a-1 
-
-Mir378               Mir378  -2.733247  7.2964165  -9.922980 1.730212e-07 4.112858e-06  7.672216    4075               Mir378 
-
-Mir199b             Mir199b  -5.651345  2.8029895  -9.883978 1.812024e-07 4.219426e-06  7.548022     370             Mir199b 
-
-Mir155               Mir155  -4.158272  3.8002361  -9.845490 1.896814e-07 4.328530e-06  7.604112     463               Mir155 
-
-
- 
-
-
edgeR images and outputs
-(Click on a thumbnail image to download the corresponding original PDF image)
-
-
- 
-
- 
-
-
-
-
edgeR log output
-
-
-
-[1] Common Dispersion = 0.228651460998105 CV =  0.478175136323613 getPriorN =  3.33333333333333
-
-[1] "No GLM fit outlier genes found\n"
-
-[1] @@ edgeR Top tags\n
-
-                       Name      logFC    logCPM        LR        PValue   adj.p.value Dispersion totreads                                                                                               URL
-
-Mir208a             Mir208a -11.840751  8.465017 594.16946 3.104543e-131 3.542284e-128 0.05171220     4638             Mir208a 
-
-Mir149               Mir149  -7.008984  8.861767 484.30321 2.473909e-107 1.411365e-104 0.04959937     6164               Mir149 
-
-Mir208b             Mir208b -13.291635  9.905945 417.69758  7.737463e-93  2.942815e-90 0.10508096    14756             Mir208b 
-
-Mir122a             Mir122a  10.514683 12.478088 415.17429  2.740525e-92  7.817349e-90 0.10803882    90428             Mir122a 
-
-Mir204               Mir204  -7.498162  7.634507 341.30678  3.313430e-76  7.561247e-74 0.06907958     2601               Mir204 
-
-Mir499               Mir499 -13.577454  8.700078 325.79199  7.930755e-73  1.508165e-70 0.12042284     6527               Mir499 
-
-Mir490               Mir490  -8.534394  6.991023 303.17184  6.710366e-68  1.093790e-65 0.07949711     1741               Mir490 
-
-Mir192               Mir192   6.953853 17.169364 217.22867  3.638307e-49  5.189135e-47 0.12700995  2325567               Mir192 
-
-Mir802               Mir802  11.440805  6.593380 212.88059  3.231644e-48  4.097007e-46 0.12273671     1514               Mir802 
-
-Mir1948             Mir1948   7.418142  7.252734 195.66958  1.840248e-44  2.099723e-42 0.12060221     2404             Mir1948 
-
-Mir194-2           Mir194-2   5.298950  7.811522 191.85588  1.250960e-43  1.297587e-41 0.08670751     3570           Mir194-2 
-
-Mir23a               Mir23a  -3.153807  9.529402 177.53185  1.676248e-40  1.593833e-38 0.04442763    10118               Mir23a 
-
-Mir181c             Mir181c  -3.767686 10.639598 169.87390  7.883295e-39  6.919107e-37 0.06368883    23605             Mir181c 
-
-Mir3073             Mir3073  10.686337  5.859950 164.86740  9.778593e-38  7.969554e-36 0.14069249      904             Mir3073 
-
-Mir181d             Mir181d  -3.643963  7.300371 162.18591  3.767663e-37  2.865936e-35 0.05729574     2139             Mir181d 
-
-Mir195               Mir195  -3.203683  8.215089 150.20548  1.563314e-34  1.114838e-32 0.05235020     3962               Mir195 
-
-Mir10b               Mir10b  -5.182616 13.946466 147.24793  6.926819e-34  4.649118e-32 0.12268790   197340               Mir10b 
-
-Mir101b             Mir101b   3.759962 11.863187 136.31359  1.703812e-31  1.080028e-29 0.07961343    59019             Mir101b 
-
-Mir378               Mir378  -3.115599  8.119617 126.76408  2.092233e-29  1.256441e-27 0.05942391     4075               Mir378 
-
-Mir27a               Mir27a  -3.064687 10.642480 124.98911  5.117477e-29  2.919520e-27 0.06113852    21886               Mir27a 
-
-Mir182               Mir182   5.057509  8.846381 123.17765  1.275060e-28  6.927826e-27 0.13653707     7189               Mir182 
-
-Mir322               Mir322  -3.194159  9.012888 107.34926  3.732413e-25  1.935765e-23 0.07536483     7074               Mir322 
-
-Mir199b             Mir199b  -5.520119  4.792610 102.10724  5.259607e-24  2.609223e-22 0.13417024      370             Mir199b 
-
-Mir181a-2         Mir181a-2  -3.000177  7.637692 101.38361  7.578821e-24  3.603098e-22 0.06896654     2817         Mir181a-2 
-
-Mir125b-2         Mir125b-2  -2.987759  8.144514  91.72544  9.957640e-22  4.488356e-20 0.07737381     3837         Mir125b-2 
-
-Dnm3os               Dnm3os  -3.331215  6.686950  91.67250  1.022763e-21  4.488356e-20 0.08810497     1401               Dnm3os 
-
-Mir184               Mir184  -5.111350  4.234160  84.35542  4.133639e-20  1.686711e-18 0.13502324      247               Mir184 
-
-Mir215               Mir215  -3.058208  6.447966  84.35278  4.139167e-20  1.686711e-18 0.08138517     1182               Mir215 
-
-Mir133b             Mir133b  -8.383611  3.584760  83.96681  5.031517e-20  1.960318e-18 0.17482280      159             Mir133b 
-
-Mir150               Mir150  -2.883446  8.307765  83.91918  5.154210e-20  1.960318e-18 0.08008123     4229               Mir150 
-
-Mir3074-2         Mir3074-2  -2.778308  7.935651  83.74839  5.619282e-20  2.040616e-18 0.07424646     3470         Mir3074-2 
-
-Mir24-2             Mir24-2  -2.778307  7.935651  83.71222  5.723024e-20  2.040616e-18 0.07427992     3470             Mir24-2 
-
-Mir193               Mir193   5.176579  4.801090  83.19222  7.445011e-20  2.574169e-18 0.14794861      421               Mir193 
-
-Scarna17           Scarna17   2.182159  9.244479  81.91330  1.421894e-19  4.771710e-18 0.04982909     9224           Scarna17 
-
-Mir214               Mir214  -3.271172  6.271755  80.43948  2.997458e-19  9.771712e-18 0.09566584     1048               Mir214 
-
-Snord104           Snord104   2.330488 11.053611  79.50529  4.809369e-19  1.524303e-17 0.05915990    33458           Snord104 
-
-Mir200a             Mir200a   7.201555  4.139422  77.35503  1.428304e-18  4.365755e-17 0.19287764      264             Mir200a 
-
-Mir200b             Mir200b   6.525423  5.752604  77.31985  1.453976e-18  4.365755e-17 0.26237966      888             Mir200b 
-
-Mir21                 Mir21   2.923147 13.825255  75.51798  3.620938e-18  1.059357e-16 0.09395834   229120                 Mir21 
-
-Mir203               Mir203   1.956427  8.767610  75.17870  4.299815e-18  1.226522e-16 0.04381710     6739               Mir203 
-
-Mir155               Mir155  -3.886731  5.068563  73.81316  8.587210e-18  2.389758e-16 0.12522673      463               Mir155 
-
-Cyp3a25             Cyp3a25   8.681501  3.972085  72.29680  1.851471e-17  5.029829e-16 0.23125383      226             Cyp3a25 
-
-Rabggtb             Rabggtb   1.934093 10.298211  72.02043  2.129809e-17  5.651422e-16 0.04596646    19535             Rabggtb 
-
-Mir23b               Mir23b  -2.100584 10.184110  71.44225  2.854935e-17  7.403367e-16 0.05416378    16387               Mir23b 
-
-Snord52             Snord52   2.207491 10.217554  71.27974  3.100027e-17  7.860292e-16 0.05941483    18059             Snord52 
-
-Gm5441               Gm5441  -6.881248  3.538457  70.05615  5.764004e-17  1.429724e-15 0.20097284      142               Gm5441 
-
-Mir153               Mir153  -6.857671  3.517446  69.37600  8.137282e-17  1.975455e-15 0.20158808      140               Mir153 
-
-Mir132               Mir132  -2.858294  5.938312  64.52507  9.531204e-16  2.265647e-14 0.09274248      857               Mir132 
-
-1110038B12Rik 1110038B12Rik   2.195962 11.253090  62.92015  2.152583e-15  5.012443e-14 0.06712174    37066 1110038B12Rik 
-
-Snord91a           Snord91a   2.654072  6.557504  62.40549  2.795431e-15  6.379174e-14 0.08637410     1437           Snord91a 
-
-[1] @@ 416 tags significant at adj p= 0.05
-
-
- 
-
-
Other log output
-
-
-
-## Toolfactory generated command line = Rscript - None None
-
-Got TCols=1 5 6 7; CCols=2 3 4 8
-
-R version 3.0.2 (2013-09-25)
-
-Platform: x86_64-pc-linux-gnu (64-bit)
-
-locale:
-
- [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C               LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8     LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8    LC_PAPER=en_AU.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
-
-attached base packages:
-
-[1] parallel  splines   methods   stats     graphics  grDevices utils     datasets  base     
-
-other attached packages:
-
- [1] RColorBrewer_1.0-5      DESeq2_1.2.10           RcppArmadillo_0.4.400.0 Rcpp_0.11.2             GenomicRanges_1.14.4    XVector_0.2.0           IRanges_1.20.7          BiocGenerics_0.8.0      edgeR_3.4.2             limma_3.18.13           gplots_2.15.0           stringr_0.6.2          
-
-loaded via a namespace (and not attached):
-
- [1] annotate_1.40.1      AnnotationDbi_1.24.0 Biobase_2.22.0       bitops_1.0-6         caTools_1.17.1       DBI_0.3.0            gdata_2.13.3         genefilter_1.44.0    grid_3.0.2           gtools_3.4.1         KernSmooth_2.23-13   lattice_0.20-29      locfit_1.5-9.1       RSQLite_0.11.4       stats4_3.0.2         survival_2.37-7      XML_3.98-1.1         xtable_1.7-4        
-
-
- 
-
-
All output files available for downloading
-
-
-
-    
-         
-    
-         
-    
-         
-
-    
-        
-            
-                
-                    
-                                             
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/survival_2.37-7.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/lars_1.2.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/lattice_0.20-29.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/latticeExtra_0.6-26.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/Matrix_1.1-4.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/glmnet_1.9-8.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/KernSmooth_2.23-13.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/numDeriv_2012.9-1.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/lava_1.3.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/prodlim_1.5.1.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/codetools_0.2-9.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/iterators_1.0.7.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/foreach_1.4.2.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/Formula_1.1-2.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/RColorBrewer_1.1-2.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/cluster_1.15.3.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/rpart_4.1-8.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/nnet_7.3-8.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/acepack_1.3-3.3.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/foreign_0.8-61.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/Hmisc_3.14-6.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/SparseM_1.05.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/quantreg_5.05.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/nlme_3.1-118.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/polspline_1.1.9.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/mvtnorm_1.0-2.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/TH.data_1.0-5.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/zoo_1.7-11.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/sandwich_2.3-2.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/multcomp_1.3-8.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/rms_4.2-1.tar.gz?raw=true 
-                    https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/pec_2.4.4.tar.gz?raw=true 
-
-                 
-             
-         
-        
-        Differential gene expression analysis
-        You may need libxml2-dev for XML to compile
-        Ubuntu has a bug with libgfortran. To fix that create a symlink like: sudo ln -s /usr/lib/x86_64-linux-gnu/libgfortran.so.3 /usr/lib/x86_64-linux-gnu/libgfortran.so
-         
-     
-