# HG changeset patch
# User mvdbeek
# Date 1423313778 18000
# Node ID eb4ec3488f3a413375a1e49d258e18301482b9df
Uploaded
diff -r 000000000000 -r eb4ec3488f3a multiple_overlap_signatures/multiple_overlap_signatures.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/multiple_overlap_signatures/multiple_overlap_signatures.py Sat Feb 07 07:56:18 2015 -0500
@@ -0,0 +1,728 @@
+# multiple_overlap_signatures/multiple_overlap_signatures.py - a self annotated version of DockerToolFactory.py generated by running DockerToolFactory.py
+# to make a new Galaxy tool called multiple overlap signatures
+# User m.vandenbeek@gmail.com at 07/02/2015 12:43:09
+# DockerToolFactory.py
+# see https://bitbucket.org/mvdbeek/DockerToolFactory
+
+import sys
+import shutil
+import subprocess
+import os
+import time
+import tempfile
+import argparse
+import tarfile
+import re
+import shutil
+import math
+import fileinput
+from os.path import abspath
+
+progname = os.path.split(sys.argv[0])[1]
+myversion = 'V001.1 March 2014'
+verbose = False
+debug = False
+toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
+
+# if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
+# tool xml
+toolhtmldepskel = """
+
+
+
+
+
+
+
+
+ %s
+
+
+"""
+
+protorequirements = """
+ ghostscript
+ graphicsmagick
+ toolfactory/custombuild:%s
+ """
+
+def timenow():
+ """return current time as a string
+ """
+ return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+html_escape_table = {
+ "&": "&",
+ ">": ">",
+ "<": "<",
+ "$": "\$"
+ }
+
+def html_escape(text):
+ """Produce entities within text."""
+ return "".join(html_escape_table.get(c,c) for c in text)
+
+def cmd_exists(cmd):
+ return subprocess.call("type " + cmd, shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
+
+def edit_dockerfile(dockerfile):
+ '''we have to change the userid of galaxy inside the container to the id with which the tool is run,
+ otherwise we have a mismatch in the file permissions inside the container'''
+ uid=os.getuid()
+ for line in fileinput.FileInput(dockerfile, inplace=1):
+ sys.stdout.write(re.sub("RUN adduser galaxy.*", "RUN adduser galaxy -u {0}\n".format(uid), line))
+
+def build_docker(dockerfile, docker_client, image_tag='base'):
+ '''Given the path to a dockerfile, and a docker_client, build the image, if it does not
+ exist yet.'''
+ image_id='toolfactory/custombuild:'+image_tag
+ existing_images=", ".join(["".join(d['RepoTags']) for d in docker_client.images()])
+ if image_id in existing_images:
+ print 'docker container exists, skipping build'
+ return image_id
+ print "Building Docker image, using Dockerfile:{0}".format(dockerfile)
+ build_process=docker_client.build(fileobj=open(dockerfile, 'r'), tag=image_id)
+ print "succesfully dispatched docker build process, building now"
+ build_log=[line for line in build_process] #will block until image is built.
+ return image_id
+
+def construct_bind(host_path, container_path=False, binds=None, ro=True):
+ #TODO remove container_path if it's alwyas going to be the same as host_path
+ '''build or extend binds dictionary with container path. binds is used
+ to mount all files using the docker-py client.'''
+ if not binds:
+ binds={}
+ if isinstance(host_path, list):
+ for k,v in enumerate(host_path):
+ if not container_path:
+ container_path=host_path[k]
+ binds[host_path[k]]={'bind':container_path, 'ro':ro}
+ container_path=False #could be more elegant
+ return binds
+ else:
+ if not container_path:
+ container_path=host_path
+ binds[host_path]={'bind':container_path, 'ro':ro}
+ return binds
+
+def switch_to_docker(opts):
+ import docker #need local import, as container does not have docker-py
+ docker_client=docker.Client()
+ toolfactory_path=abspath(sys.argv[0])
+ dockerfile=os.path.dirname(toolfactory_path)+'/Dockerfile'
+ edit_dockerfile(dockerfile)
+ image_id=build_docker(dockerfile, docker_client)
+ binds=construct_bind(host_path=opts.script_path, ro=False)
+ binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False)
+ if len(opts.input_tab)>0:
+ binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True)
+ if not opts.output_tab == 'None':
+ binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False)
+ if opts.make_HTML:
+ binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False)
+ if opts.make_Tool:
+ binds=construct_bind(binds=binds, host_path=opts.new_tool, ro=False)
+ binds=construct_bind(binds=binds, host_path=opts.help_text, ro=True)
+ binds=construct_bind(binds=binds, host_path=toolfactory_path)
+ volumes=binds.keys()
+ sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir
+ cmd=['python', '-u']+sys.argv+['--dockerized', '1']
+ container=docker_client.create_container(
+ image=image_id,
+ user='galaxy',
+ volumes=volumes,
+ command=cmd
+ )
+ docker_client.start(container=container[u'Id'], binds=binds)
+ docker_client.wait(container=container[u'Id'])
+ logs=docker_client.logs(container=container[u'Id'])
+ print "".join([log for log in logs])
+
+class ScriptRunner:
+ """class is a wrapper for an arbitrary script
+ """
+
+ def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'):
+ """
+ cleanup inputs, setup some outputs
+
+ """
+ self.opts = opts
+ self.useGM = cmd_exists('gm')
+ self.useIM = cmd_exists('convert')
+ self.useGS = cmd_exists('gs')
+ self.temp_warned = False # we want only one warning if $TMP not set
+ self.treatbashSpecial = treatbashSpecial
+ self.image_tag = image_tag
+ os.chdir(abspath(opts.output_dir))
+ self.thumbformat = 'png'
+ self.toolname_sanitized = re.sub('[^a-zA-Z0-9_]+', '_', opts.tool_name) # a sanitizer now does this but..
+ self.toolname = opts.tool_name
+ self.toolid = self.toolname
+ self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
+ self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
+ self.xmlfile = '%s.xml' % self.toolname_sanitized
+ s = open(self.opts.script_path,'r').readlines()
+ s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
+ self.script = '\n'.join(s)
+ fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname_sanitized,suffix=".%s" % (opts.interpreter))
+ tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
+ tscript.write(self.script)
+ tscript.close()
+ self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
+ self.escapedScript = '\n'.join([html_escape(x) for x in s])
+ self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname_sanitized)
+ if opts.output_dir: # may not want these complexities
+ self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname_sanitized)
+ art = '%s.%s' % (self.toolname_sanitized,opts.interpreter)
+ artpath = os.path.join(self.opts.output_dir,art) # need full path
+ artifact = open(artpath,'w') # use self.sfile as script source for Popen
+ artifact.write(self.script)
+ artifact.close()
+ self.cl = []
+ self.html = []
+ a = self.cl.append
+ a(opts.interpreter)
+ if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
+ a(self.sfile)
+ else:
+ a('-') # stdin
+ for input in opts.input_tab:
+ a(input)
+ if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname
+ a(str(self.toolname_sanitized)+'.out')
+ a(opts.output_tab)
+ for param in opts.additional_parameters:
+ param, value=param.split(',')
+ a('--'+param)
+ a(value)
+ #print self.cl
+ self.outFormats = opts.output_format
+ self.inputFormats = [formats for formats in opts.input_formats]
+ self.test1Input = '%s_test1_input.xls' % self.toolname_sanitized
+ self.test1Output = '%s_test1_output.xls' % self.toolname_sanitized
+ self.test1HTML = '%s_test1_output.html' % self.toolname_sanitized
+
+ def makeXML(self):
+ """
+ Create a Galaxy xml tool wrapper for the new script as a string to write out
+ fixme - use templating or something less fugly than this example of what we produce
+
+
+ a tabular file
+
+ reverse.py --script_path "$runMe" --interpreter "python"
+ --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
+
+
+
+
+
+
+
+
+
+
+
+**What it Does**
+
+Reverse the columns in a tabular file
+
+
+
+
+
+# reverse order of columns in a tabular file
+import sys
+inp = sys.argv[1]
+outp = sys.argv[2]
+i = open(inp,'r')
+o = open(outp,'w')
+for row in i:
+ rs = row.rstrip().split('\t')
+ rs.reverse()
+ o.write('\t'.join(rs))
+ o.write('\n')
+i.close()
+o.close()
+
+
+
+
+
+
+ """
+ newXML="""
+%(tooldesc)s
+%(requirements)s
+
+%(command)s
+
+
+%(inputs)s
+
+
+%(outputs)s
+
+
+
+%(script)s
+
+
+
+%(tooltests)s
+
+
+
+%(help)s
+
+
+ """ # needs a dict with toolname, toolname_sanitized, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
+
+ newCommand="""
+ %(toolname_sanitized)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
+ --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s """
+ # may NOT be an input or htmlout - appended later
+ tooltestsTabOnly = """
+
+
+
+
+
+
+
+ """
+ tooltestsHTMLOnly = """
+
+
+
+
+
+
+
+ """
+ tooltestsBoth = """
+
+
+
+
+
+
+
+ """
+ xdict = {}
+ #xdict['requirements'] = ''
+ #if self.opts.make_HTML:
+ xdict['requirements'] = protorequirements % self.image_tag
+ xdict['tool_version'] = self.opts.tool_version
+ xdict['test1Input'] = self.test1Input
+ xdict['test1HTML'] = self.test1HTML
+ xdict['test1Output'] = self.test1Output
+ if self.opts.make_HTML and self.opts.output_tab <> 'None':
+ xdict['tooltests'] = tooltestsBoth % xdict
+ elif self.opts.make_HTML:
+ xdict['tooltests'] = tooltestsHTMLOnly % xdict
+ else:
+ xdict['tooltests'] = tooltestsTabOnly % xdict
+ xdict['script'] = self.escapedScript
+ # configfile is least painful way to embed script to avoid external dependencies
+ # but requires escaping of <, > and $ to avoid Mako parsing
+ if self.opts.help_text:
+ helptext = open(self.opts.help_text,'r').readlines()
+ helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
+ xdict['help'] = ''.join([x for x in helptext])
+ else:
+ xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
+ coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
+ coda.append('\n')
+ coda.append(self.indentedScript)
+ coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
+ coda.append('See %s for details of that project' % (toolFactoryURL))
+ coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
+ coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
+ xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
+ if self.opts.tool_desc:
+ xdict['tooldesc'] = '%s ' % self.opts.tool_desc
+ else:
+ xdict['tooldesc'] = ''
+ xdict['command_outputs'] = ''
+ xdict['outputs'] = ''
+ if self.opts.input_tab <> 'None':
+ xdict['command_inputs'] = '--input_tab'
+ xdict['inputs']=''
+ for i,input in enumerate(self.inputFormats):
+ xdict['inputs' ]+=' \n'.format(i+1, input)
+ xdict['command_inputs'] += ' $input{0}'.format(i+1)
+ else:
+ xdict['command_inputs'] = '' # assume no input - eg a random data generator
+ xdict['inputs'] = ''
+ # I find setting the job name not very logical. can be changed in workflows anyway. xdict['inputs'] += ' \n' % self.toolname
+ xdict['toolname'] = self.toolname
+ xdict['toolname_sanitized'] = self.toolname_sanitized
+ xdict['toolid'] = self.toolid
+ xdict['interpreter'] = self.opts.interpreter
+ xdict['scriptname'] = self.sfile
+ if self.opts.make_HTML:
+ xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
+ xdict['outputs'] += ' \n'
+ else:
+ xdict['command_outputs'] += ' --output_dir "./"'
+ #print self.opts.output_tab
+ if self.opts.output_tab!="None":
+ xdict['command_outputs'] += ' --output_tab "$tab_file"'
+ xdict['outputs'] += ' \n' % self.outFormats
+ xdict['command'] = newCommand % xdict
+ #print xdict['outputs']
+ xmls = newXML % xdict
+ xf = open(self.xmlfile,'w')
+ xf.write(xmls)
+ xf.write('\n')
+ xf.close()
+ # ready for the tarball
+
+
+ def makeTooltar(self):
+ """
+ a tool is a gz tarball with eg
+ /toolname_sanitized/tool.xml /toolname_sanitized/tool.py /toolname_sanitized/test-data/test1_in.foo ...
+ """
+ retval = self.run()
+ if retval:
+ print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
+ sys.exit(1)
+ tdir = self.toolname_sanitized
+ os.mkdir(tdir)
+ self.makeXML()
+ if self.opts.make_HTML:
+ if self.opts.help_text:
+ hlp = open(self.opts.help_text,'r').read()
+ else:
+ hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
+ if self.opts.include_dependencies:
+ tooldepcontent = toolhtmldepskel % hlp
+ depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
+ depf.write(tooldepcontent)
+ depf.write('\n')
+ depf.close()
+ if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
+ testdir = os.path.join(tdir,'test-data')
+ os.mkdir(testdir) # make tests directory
+ for i in self.opts.input_tab:
+ #print i
+ shutil.copyfile(i,os.path.join(testdir,self.test1Input))
+ if not self.opts.output_tab:
+ shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
+ if self.opts.make_HTML:
+ shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
+ if self.opts.output_dir:
+ shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
+ outpif = '%s.py' % self.toolname_sanitized # new name
+ outpiname = os.path.join(tdir,outpif) # path for the tool tarball
+ pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
+ notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
+ notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
+ notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
+ pi=[line.replace('if False:', 'if False:') for line in open(self.pyfile)] #do not run docker in the generated tool
+ notes += pi
+ outpi = open(outpiname,'w')
+ outpi.write(''.join(notes))
+ outpi.write('\n')
+ outpi.close()
+ stname = os.path.join(tdir,self.sfile)
+ if not os.path.exists(stname):
+ shutil.copyfile(self.sfile, stname)
+ xtname = os.path.join(tdir,self.xmlfile)
+ if not os.path.exists(xtname):
+ shutil.copyfile(self.xmlfile,xtname)
+ tarpath = "%s.gz" % self.toolname_sanitized
+ tar = tarfile.open(tarpath, "w:gz")
+ tar.add(tdir,arcname=self.toolname_sanitized)
+ tar.close()
+ shutil.copyfile(tarpath,self.opts.new_tool)
+ shutil.rmtree(tdir)
+ ## TODO: replace with optional direct upload to local toolshed?
+ return retval
+
+
+ def compressPDF(self,inpdf=None,thumbformat='png'):
+ """need absolute path to pdf
+ note that GS gets confoozled if no $TMP or $TEMP
+ so we set it
+ """
+ assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
+ hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
+ sto = open(hlog,'a')
+ our_env = os.environ.copy()
+ our_tmp = our_env.get('TMP',None)
+ if not our_tmp:
+ our_tmp = our_env.get('TEMP',None)
+ if not (our_tmp and os.path.exists(our_tmp)):
+ newtmp = os.path.join(self.opts.output_dir,'tmp')
+ try:
+ os.mkdir(newtmp)
+ except:
+ sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
+ our_env['TEMP'] = newtmp
+ if not self.temp_warned:
+ sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
+ self.temp_warned = True
+ outpdf = '%s_compressed' % inpdf
+ cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
+ x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
+ retval1 = x.wait()
+ sto.close()
+ if retval1 == 0:
+ os.unlink(inpdf)
+ shutil.move(outpdf,inpdf)
+ os.unlink(hlog)
+ hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
+ sto = open(hlog,'w')
+ outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
+ if self.useGM:
+ cl2 = ['gm', 'convert', inpdf, outpng]
+ else: # assume imagemagick
+ cl2 = ['convert', inpdf, outpng]
+ x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
+ retval2 = x.wait()
+ sto.close()
+ if retval2 == 0:
+ os.unlink(hlog)
+ retval = retval1 or retval2
+ return retval
+
+
+ def getfSize(self,fpath,outpath):
+ """
+ format a nice file size string
+ """
+ size = ''
+ fp = os.path.join(outpath,fpath)
+ if os.path.isfile(fp):
+ size = '0 B'
+ n = float(os.path.getsize(fp))
+ if n > 2**20:
+ size = '%1.1f MB' % (n/2**20)
+ elif n > 2**10:
+ size = '%1.1f KB' % (n/2**10)
+ elif n > 0:
+ size = '%d B' % (int(n))
+ return size
+
+ def makeHtml(self):
+ """ Create an HTML file content to list all the artifacts found in the output_dir
+ """
+
+ galhtmlprefix = """
+
+
+
+
+
+
+
+
+ """
+ galhtmlattr = """
"""
+ galhtmlpostfix = """
\n"""
+
+ flist = os.listdir(self.opts.output_dir)
+ flist = [x for x in flist if x <> 'Rplots.pdf']
+ flist.sort()
+ html = []
+ html.append(galhtmlprefix % progname)
+ html.append('Galaxy Tool "%s" run at %s
' % (self.toolname,timenow()))
+ fhtml = []
+ if len(flist) > 0:
+ logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
+ logfiles.sort()
+ logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)]
+ logfiles.append(abspath(self.tlog)) # make it the last one
+ pdflist = []
+ npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
+ for rownum,fname in enumerate(flist):
+ dname,e = os.path.splitext(fname)
+ sfsize = self.getfSize(fname,self.opts.output_dir)
+ if e.lower() == '.pdf' : # compress and make a thumbnail
+ thumb = '%s.%s' % (dname,self.thumbformat)
+ pdff = os.path.join(self.opts.output_dir,fname)
+ retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
+ if retval == 0:
+ pdflist.append((fname,thumb))
+ else:
+ pdflist.append((fname,fname))
+ if (rownum+1) % 2 == 0:
+ fhtml.append('%s %s ' % (fname,fname,sfsize))
+ else:
+ fhtml.append('%s %s ' % (fname,fname,sfsize))
+ for logfname in logfiles: # expect at least tlog - if more
+ if abspath(logfname) == abspath(self.tlog): # handled later
+ sectionname = 'All tool run'
+ if (len(logfiles) > 1):
+ sectionname = 'Other'
+ ourpdfs = pdflist
+ else:
+ realname = os.path.basename(logfname)
+ sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
+ ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
+ pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
+ nacross = 1
+ npdf = len(ourpdfs)
+
+ if npdf > 0:
+ nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
+ if int(nacross)**2 != npdf:
+ nacross += 1
+ nacross = int(nacross)
+ width = min(400,int(1200/nacross))
+ html.append('%s images and outputs
' % sectionname)
+ html.append('(Click on a thumbnail image to download the corresponding original PDF image) ')
+ ntogo = nacross # counter for table row padding with empty cells
+ html.append('\n')
+ for i,paths in enumerate(ourpdfs):
+ fname,thumb = paths
+ s= """ \n""" % (fname,thumb,fname,width,fname)
+ if ((i+1) % nacross == 0):
+ s += ' \n'
+ ntogo = 0
+ if i < (npdf - 1): # more to come
+ s += ''
+ ntogo = nacross
+ else:
+ ntogo -= 1
+ html.append(s)
+ if html[-1].strip().endswith(' '):
+ html.append('
\n')
+ else:
+ if ntogo > 0: # pad
+ html.append(' '*ntogo)
+ html.append('\n')
+ logt = open(logfname,'r').readlines()
+ logtext = [x for x in logt if x.strip() > '']
+ html.append('%s log output
' % sectionname)
+ if len(logtext) > 1:
+ html.append('\n\n')
+ html += logtext
+ html.append('\n \n')
+ else:
+ html.append('%s is empty ' % logfname)
+ if len(fhtml) > 0:
+ fhtml.insert(0,'Output File Name (click to view) Size \n')
+ fhtml.append('
')
+ html.append('All output files available for downloading
\n')
+ html += fhtml # add all non-pdf files to the end of the display
+ else:
+ html.append('### Error - %s returned no files - please confirm that parameters are sane
' % self.opts.interpreter)
+ html.append(galhtmlpostfix)
+ htmlf = file(self.opts.output_html,'w')
+ htmlf.write('\n'.join(html))
+ htmlf.write('\n')
+ htmlf.close()
+ self.html = html
+
+
+ def run(self):
+ """
+ scripts must be small enough not to fill the pipe!
+ """
+ if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
+ retval = self.runBash()
+ else:
+ if self.opts.output_dir:
+ ste = open(self.elog,'w')
+ sto = open(self.tlog,'w')
+ sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
+ sto.flush()
+ p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
+ else:
+ p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
+ p.stdin.write(self.script)
+ p.stdin.close()
+ retval = p.wait()
+ if self.opts.output_dir:
+ sto.close()
+ ste.close()
+ err = open(self.elog,'r').readlines()
+ if retval <> 0 and err: # problem
+ print >> sys.stderr,err #same problem, need to capture docker stdin/stdout
+ if self.opts.make_HTML:
+ self.makeHtml()
+ return retval
+
+ def runBash(self):
+ """
+ cannot use - for bash so use self.sfile
+ """
+ if self.opts.output_dir:
+ s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
+ sto = open(self.tlog,'w')
+ sto.write(s)
+ sto.flush()
+ p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+ else:
+ p = subprocess.Popen(self.cl,shell=False)
+ retval = p.wait()
+ if self.opts.output_dir:
+ sto.close()
+ if self.opts.make_HTML:
+ self.makeHtml()
+ return retval
+
+
+def main():
+ u = """
+ This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
+ rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
+
+ """
+ op = argparse.ArgumentParser()
+ a = op.add_argument
+ a('--script_path',default=None)
+ a('--tool_name',default=None)
+ a('--interpreter',default=None)
+ a('--output_dir',default='./')
+ a('--output_html',default=None)
+ a('--input_tab',default='None', nargs='*')
+ a('--output_tab',default='None')
+ a('--user_email',default='Unknown')
+ a('--bad_user',default=None)
+ a('--make_Tool',default=None)
+ a('--make_HTML',default=None)
+ a('--help_text',default=None)
+ a('--tool_desc',default=None)
+ a('--new_tool',default=None)
+ a('--tool_version',default=None)
+ a('--include_dependencies',default=None)
+ a('--dockerized',default=0)
+ a('--output_format', default='tabular')
+ a('--input_format', dest='input_formats', action='append', default=[])
+ a('--additional_parameters', dest='additional_parameters', action='append', default=[])
+ opts = op.parse_args()
+ assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
+ assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
+ assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
+ assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
+ if opts.output_dir:
+ try:
+ os.makedirs(opts.output_dir)
+ except:
+ pass
+ if False:
+ switch_to_docker(opts)
+ return
+ r = ScriptRunner(opts)
+ if opts.make_Tool:
+ retcode = r.makeTooltar()
+ else:
+ retcode = r.run()
+ os.unlink(r.sfile)
+ if retcode:
+ sys.exit(retcode) # indicate failure to job runner
+
+
+if __name__ == "__main__":
+ main()
+
+
+
diff -r 000000000000 -r eb4ec3488f3a multiple_overlap_signatures/multiple_overlap_signatures.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/multiple_overlap_signatures/multiple_overlap_signatures.xml Sat Feb 07 07:56:18 2015 -0500
@@ -0,0 +1,120 @@
+
+plot multiple overlap signatures
+
+ ghostscript
+ graphicsmagick
+ toolfactory/custombuild:base
+
+
+
+ multiple_overlap_signatures.py --script_path "$runMe" --interpreter "Rscript"
+ --tool_name "multiple overlap signatures" --input_tab $input1 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
+
+
+
+
+
+
+
+
+
+
+
+library(ggplot2)
+library(reshape)
+library(plyr)
+
+ourargs = commandArgs(TRUE)
+inf = ourargs[1]
+
+df=read.delim(inf, header=T)
+pdf('pairs_signature.pdf', height=10)
+
+g= ggplot(aes(x=overlap, y=num.of.pairs), data=df)
+g + geom_line() + facet_wrap(~sample, ncol=2) + theme_bw() + ylab("Number of pairs") +
+ xlab("Overlap between pairs") + geom_vline(xintercept=10, colour="red", linetype = "longdash") + scale_x_continuous(breaks=seq(5,15))
+
+dev.off()
+samples=levels(df\$sample)
+sample_mean=sapply(samples, function(x) mean(df[df\$sample==x,]\$num.of.pairs))
+sample_sd=sapply(samples, function(x) sd(df[df\$sample==x,]\$num.of.pairs))
+sample_z=sapply(names(sample_mean), function(x) (df[df\$sample==x,]\$num.of.pairs-sample_mean[x])/sample_sd[x])
+
+z=melt(sample_z)
+colnames(z)=c("overlap", "sample", "z_score")
+z\$overlap=z\$overlap+min(df\$overlap)-1
+#df\$z_score=z\$value
+df=join(df, z)
+
+pdf('z-signature.pdf', height=10)
+g= ggplot(aes(x=overlap, y=z_score), data=df)
+g + geom_line() + facet_wrap(~sample, ncol=2) + theme_bw() + ylab("Z score") +
+ xlab("Overlap between pairs") + geom_vline(xintercept=10, colour="red", linetype = "longdash") + scale_x_continuous(breaks=seq(5,15))
+dev.off()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it Does**
+Plots the piRNA signature for a tabular file with overlap, number of pairs, probability of overlap for a pair and library name.
+This can be produced using the Small RNA Signatures tool (https://testtoolshed.g2.bx.psu.edu/view/drosofff/msp_sr_signature).
+
+**Script**
+Pressing execute will run the following code over your input file and generate some outputs in your history::
+
+
+ library(ggplot2)
+ library(reshape)
+ library(plyr)
+
+ ourargs = commandArgs(TRUE)
+ inf = ourargs[1]
+
+ df=read.delim(inf, header=T)
+ pdf('pairs_signature.pdf', height=10)
+
+ g= ggplot(aes(x=overlap, y=num.of.pairs), data=df)
+ g + geom_line() + facet_wrap(~sample, ncol=2) + theme_bw() + ylab("Number of pairs") +
+ xlab("Overlap between pairs") + geom_vline(xintercept=10, colour="red", linetype = "longdash") + scale_x_continuous(breaks=seq(5,15))
+
+ dev.off()
+ samples=levels(df\$sample)
+ sample_mean=sapply(samples, function(x) mean(df[df\$sample==x,]\$num.of.pairs))
+ sample_sd=sapply(samples, function(x) sd(df[df\$sample==x,]\$num.of.pairs))
+ sample_z=sapply(names(sample_mean), function(x) (df[df\$sample==x,]\$num.of.pairs-sample_mean[x])/sample_sd[x])
+
+ z=melt(sample_z)
+ colnames(z)=c("overlap", "sample", "z_score")
+ z\$overlap=z\$overlap+min(df\$overlap)-1
+ #df\$z_score=z\$value
+ df=join(df, z)
+
+ pdf('z-signature.pdf', height=10)
+ g= ggplot(aes(x=overlap, y=z_score), data=df)
+ g + geom_line() + facet_wrap(~sample, ncol=2) + theme_bw() + ylab("Z score") +
+ xlab("Overlap between pairs") + geom_vline(xintercept=10, colour="red", linetype = "longdash") + scale_x_continuous(breaks=seq(5,15))
+ dev.off()
+
+**Attribution**
+This Galaxy tool was created by m.vandenbeek@gmail.com at 07/02/2015 12:43:09
+using the Galaxy Tool Factory.
+
+See https://bitbucket.org/fubar/galaxytoolfactory for details of that project
+Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team.
+Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573
+
+
+
+
diff -r 000000000000 -r eb4ec3488f3a multiple_overlap_signatures/test-data/multiple_overlap_signatures_test1_input.xls
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/multiple_overlap_signatures/test-data/multiple_overlap_signatures_test1_input.xls Sat Feb 07 07:56:18 2015 -0500
@@ -0,0 +1,155 @@
+overlap num of pairs probability sample
+5 0 0.000000 GKG16
+6 2 0.000019 GKG16
+7 1 0.000011 GKG16
+8 0 0.000000 GKG16
+9 0 0.000000 GKG16
+10 1 0.000006 GKG16
+11 1 0.000008 GKG16
+12 2 0.000010 GKG16
+13 3 0.000031 GKG16
+14 3 0.000132 GKG16
+15 1 0.000009 GKG16
+5 0 0.000000 GKG18
+6 0 0.000000 GKG18
+7 0 0.000000 GKG18
+8 0 0.000000 GKG18
+9 0 0.000000 GKG18
+10 0 0.000000 GKG18
+11 0 0.000000 GKG18
+12 0 0.000000 GKG18
+13 1 0.000017 GKG18
+14 0 0.000000 GKG18
+15 1 0.000017 GKG18
+5 1 0.000011 GKG47
+6 1 0.000011 GKG47
+7 1 0.000011 GKG47
+8 1 0.000011 GKG47
+9 1 0.000018 GKG47
+10 0 0.000000 GKG47
+11 0 0.000000 GKG47
+12 0 0.000000 GKG47
+13 0 0.000000 GKG47
+14 1 0.000014 GKG47
+15 3 0.000052 GKG47
+5 3 0.000043 GKG52
+6 0 0.000000 GKG52
+7 1 0.000084 GKG52
+8 2 0.000022 GKG52
+9 1 0.000008 GKG52
+10 0 0.000000 GKG52
+11 0 0.000000 GKG52
+12 3 0.000045 GKG52
+13 2 0.000026 GKG52
+14 0 0.000000 GKG52
+15 3 0.000033 GKG52
+5 1 0.000012 GNE4
+6 1 0.000030 GNE4
+7 1 0.000010 GNE4
+8 1 0.000010 GNE4
+9 1 0.000010 GNE4
+10 1 0.000010 GNE4
+11 1 0.000059 GNE4
+12 0 0.000000 GNE4
+13 0 0.000000 GNE4
+14 0 0.000000 GNE4
+15 0 0.000000 GNE4
+5 0 0.000000 GKG63
+6 0 0.000000 GKG63
+7 0 0.000000 GKG63
+8 0 0.000000 GKG63
+9 0 0.000000 GKG63
+10 0 0.000000 GKG63
+11 0 0.000000 GKG63
+12 0 0.000000 GKG63
+13 0 0.000000 GKG63
+14 0 0.000000 GKG63
+15 0 0.000000 GKG63
+5 0 0.000000 GKG66
+6 0 0.000000 GKG66
+7 0 0.000000 GKG66
+8 0 0.000000 GKG66
+9 0 0.000000 GKG66
+10 0 0.000000 GKG66
+11 0 0.000000 GKG66
+12 0 0.000000 GKG66
+13 0 0.000000 GKG66
+14 0 0.000000 GKG66
+15 0 0.000000 GKG66
+5 7 0.000053 GKG16 with testes reads
+6 6 0.000028 GKG16 with testes reads
+7 5 0.000028 GKG16 with testes reads
+8 4 0.000022 GKG16 with testes reads
+9 7 0.000096 GKG16 with testes reads
+10 19 0.000284 GKG16 with testes reads
+11 3 0.000016 GKG16 with testes reads
+12 8 0.000054 GKG16 with testes reads
+13 0 0.000000 GKG16 with testes reads
+14 7 0.000036 GKG16 with testes reads
+15 7 0.000048 GKG16 with testes reads
+5 2 0.000030 GKG18 with testes reads
+6 2 0.000026 GKG18 with testes reads
+7 2 0.000024 GKG18 with testes reads
+8 6 0.000087 GKG18 with testes reads
+9 1 0.000011 GKG18 with testes reads
+10 25 0.000468 GKG18 with testes reads
+11 2 0.000026 GKG18 with testes reads
+12 1 0.000009 GKG18 with testes reads
+13 3 0.000063 GKG18 with testes reads
+14 1 0.000015 GKG18 with testes reads
+15 2 0.000026 GKG18 with testes reads
+5 4 0.000054 GKG47 with testes reads
+6 2 0.000026 GKG47 with testes reads
+7 4 0.000047 GKG47 with testes reads
+8 4 0.000053 GKG47 with testes reads
+9 4 0.000055 GKG47 with testes reads
+10 13 0.000358 GKG47 with testes reads
+11 5 0.000079 GKG47 with testes reads
+12 3 0.000039 GKG47 with testes reads
+13 2 0.000022 GKG47 with testes reads
+14 4 0.000052 GKG47 with testes reads
+15 6 0.000085 GKG47 with testes reads
+5 5 0.000055 GKG52 with testes reads
+6 1 0.000010 GKG52 with testes reads
+7 1 0.000083 GKG52 with testes reads
+8 0 0.000000 GKG52 with testes reads
+9 1 0.000013 GKG52 with testes reads
+10 15 0.000266 GKG52 with testes reads
+11 1 0.000013 GKG52 with testes reads
+12 8 0.000093 GKG52 with testes reads
+13 4 0.000049 GKG52 with testes reads
+14 2 0.000022 GKG52 with testes reads
+15 4 0.000050 GKG52 with testes reads
+5 1 0.000017 GNE4 with testes reads
+6 0 0.000000 GNE4 with testes reads
+7 0 0.000000 GNE4 with testes reads
+8 1 0.000017 GNE4 with testes reads
+9 5 0.000108 GNE4 with testes reads
+10 22 0.000422 GNE4 with testes reads
+11 2 0.000048 GNE4 with testes reads
+12 3 0.000045 GNE4 with testes reads
+13 3 0.000044 GNE4 with testes reads
+14 4 0.000049 GNE4 with testes reads
+15 10 0.000174 GNE4 with testes reads
+5 3 0.000091 GKG63 with testes reads
+6 1 0.000030 GKG63 with testes reads
+7 3 0.000091 GKG63 with testes reads
+8 4 0.000121 GKG63 with testes reads
+9 3 0.000083 GKG63 with testes reads
+10 18 0.000596 GKG63 with testes reads
+11 0 0.000000 GKG63 with testes reads
+12 1 0.000030 GKG63 with testes reads
+13 0 0.000000 GKG63 with testes reads
+14 2 0.000060 GKG63 with testes reads
+15 3 0.000106 GKG63 with testes reads
+5 2 0.000037 GKG66 with testes reads
+6 2 0.000032 GKG66 with testes reads
+7 5 0.000075 GKG66 with testes reads
+8 2 0.000024 GKG66 with testes reads
+9 6 0.000152 GKG66 with testes reads
+10 22 0.000595 GKG66 with testes reads
+11 1 0.000008 GKG66 with testes reads
+12 8 0.000146 GKG66 with testes reads
+13 3 0.000075 GKG66 with testes reads
+14 4 0.000088 GKG66 with testes reads
+15 5 0.000124 GKG66 with testes reads
diff -r 000000000000 -r eb4ec3488f3a multiple_overlap_signatures/test-data/multiple_overlap_signatures_test1_output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/multiple_overlap_signatures/test-data/multiple_overlap_signatures_test1_output.html Sat Feb 07 07:56:18 2015 -0500
@@ -0,0 +1,70 @@
+
+
+
+
+
+
+
+
+
+
+
Galaxy Tool "multiple overlap signatures" run at 07/02/2015 12:43:09
+
multiple log output
+
+
+
+Loading required package: methods
+
+Attaching package: 'plyr'
+
+The following objects are masked from 'package:reshape':
+
+ rename, round_any
+
+Joining by: overlap, sample
+
+
+
+
+
Other images and outputs
+(Click on a thumbnail image to download the corresponding original PDF image)
+
+
+
+
+
+
+
+
+
+
Other log output
+
+
+
+## Toolfactory generated command line = Rscript - /home/galaxy/galaxy-dist/database/files/008/dataset_8175.dat multiple_overlap_signatures.out None
+
+null device
+
+ 1
+
+null device
+
+ 1
+
+
+
+
+
All output files available for downloading
+
+
+
+
diff -r 000000000000 -r eb4ec3488f3a multiple_overlap_signatures/test-data/test1_out.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/multiple_overlap_signatures/test-data/test1_out.log Sat Feb 07 07:56:18 2015 -0500
@@ -0,0 +1,5 @@
+## Toolfactory generated command line = Rscript - /home/galaxy/galaxy-dist/database/files/008/dataset_8175.dat multiple_overlap_signatures.out None
+null device
+ 1
+null device
+ 1