comparison fubar-galaxytoolfactory-8cc3f3c0e539/rgToolFactory.py @ 3:5dbaf051b2e7 draft

Uploaded
author fubar
date Wed, 06 Jun 2012 23:06:32 -0400
parents f82dd90a8c9e
children
comparison
equal deleted inserted replaced
2:6985104392b9 3:5dbaf051b2e7
1 # rgToolFactory.py
2 # https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3
4 # this is a tool factory for simple scripts in python, R or whatever ails ye.
5 # you paste and run your script
6 # Only works for simple scripts that read one input from the history.
7 # Optionally can write one new history dataset,
8 # and optionally collect any number of outputs into links on an autogenerated HTML page.
9
10 # DO NOT install on a public or important site - please.
11
12 # installed generated tools are fine if the script is safe.
13 # They just run normally and their user cannot do anything unusually insecure
14 # but please, practice safe toolshed.
15 # Read the fucking code before you install any tool
16 # especially this one
17
18 # After you get the script working on some test data, you can
19 # optionally generate a toolshed compatible gzip file
20 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
21 # safe and largely automated installation in a production Galaxy.
22
23 # If you opt for an HTML output, you get all the script outputs arranged
24 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
25 # Ugly but really inexpensive.
26 #
27 # Patches appreciated please.
28 #
29 #
30 # long route to June 2012 product
31 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
32 # derived from an integrated script model
33 # called rgBaseScriptWrapper.py
34 # Note to the unwary:
35 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
36 # There is nothing stopping a malicious user doing whatever they choose
37 # Extremely dangerous!!
38 # Totally insecure. So, trusted users only
39 #
40 # preferred model is a developer using their throw away workstation instance - ie a private site.
41 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
42 #
43
44 #
45 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
46 #
47 # all rights reserved
48 # Licensed under the LGPL if you want to improve it, feel free https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
49
50 import sys
51 import shutil
52 import subprocess
53 import os
54 import time
55 import tempfile
56 import optparse
57 import tarfile
58 import re
59 import shutil
60
61 progname = os.path.split(sys.argv[0])[1]
62 myversion = 'V000.1 May 2012'
63 verbose = False
64 debug = False
65
66
67 def timenow():
68 """return current time as a string
69 """
70 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
71 # characters that are allowed but need to be escaped
72
73 class ScriptRunner:
74 """class is a wrapper for an arbitrary script
75 """
76
77 def __init__(self,opts=None):
78 """
79 cleanup inputs, setup some outputs
80
81 """
82 if opts.output_dir: # simplify for the tool tarball
83 os.chdir(opts.output_dir)
84 self.thumbformat = 'jpg'
85 self.opts = opts
86 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name)
87 self.toolid = self.toolname
88 s = open(self.opts.script_path,'r').read()
89 self.script = s
90 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
91 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
92 self.xmlfile = '%s.xml' % self.toolname
93 self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
94 if opts.output_dir: # may not want these complexities
95 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
96 artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname)
97 artifact = open(artifactpath,'w')
98 artifact.write(self.script)
99 artifact.write('\n')
100 artifact.close()
101 if opts.make_Tool: # need this code and the user script for the tarball
102 localscript = open(self.sfile,'w')
103 localscript.write(self.script)
104 localscript.close()
105 shutil.copyfile(self.myname,'%s.py' % self.toolname) # for tool and for user
106 self.cl = []
107 self.html = []
108 a = self.cl.append
109 a(opts.interpreter)
110 a('-') # use stdin
111 a(opts.input_tab)
112 a(opts.output_tab)
113 self.outFormats = 'tabular' # TODO make this an option at tool generation time
114 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
115 self.test1Input = 'test1_input.xls'
116 self.test1Output = 'test1_output.xls'
117 self.test1HTML = 'test1_output.html'
118
119 def makeXML(self):
120 """
121 Create a Galaxy xml tool wrapper for the new script as a string to write out
122 fixme - use templating or something less fugly than this example of what we produce
123
124 <tool id="reverse" name="reverse" version="0.01">
125 <description>a tabular file</description>
126 <command interpreter="python">
127 reverse.py --script_path "$runMe" --interpreter "python"
128 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
129 </command>
130 <inputs>
131 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
132
133 </inputs>
134 <outputs>
135 <data format="tabular" name="tab_file" label="${job_name}"/>
136
137 </outputs>
138 <help>
139
140 **What it Does**
141
142 Reverse the columns in a tabular file
143
144 </help>
145 <configfiles>
146 <configfile name="runMe">
147
148 # reverse order of columns in a tabular file
149 import sys
150 inp = sys.argv[1]
151 outp = sys.argv[2]
152 i = open(inp,'r')
153 o = open(outp,'w')
154 for row in i:
155 rs = row.rstrip().split('\t')
156 rs.reverse()
157 o.write('\t'.join(rs))
158 o.write('\n')
159 i.close()
160 o.close()
161
162
163 </configfile>
164 </configfiles>
165 </tool>
166
167 """
168 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="0.01">
169 %(tooldesc)s
170 %(command)s
171 <inputs>
172 %(inputs)s
173 </inputs>
174 <outputs>
175 %(outputs)s
176 </outputs>
177 <help>
178 %(help)s
179 </help>
180 <configfiles>
181 <configfile name="runMe">
182 %(script)s
183 </configfile>
184 </configfiles>
185 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
186
187 newCommand="""<command interpreter="python">
188 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
189 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
190 </command>""" # may NOT be an input or htmlout
191 tooltests = """<tests><test>
192 <param name="input1" value="%s" ftype="%s"/>
193 <param name="job_name" value="test1"/>
194 <param name="runMe" value="$runMe"/>
195 </test><tests>"""
196 xdict = {}
197 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
198 if self.opts.help_text:
199 xdict['help'] = open(self.opts.help_text,'r').read()
200 else:
201 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
202 if self.opts.tool_desc:
203 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
204 else:
205 xdict['tooldesc'] = ''
206 xdict['command_outputs'] = ''
207 xdict['outputs'] = ''
208 if self.opts.input_tab <> 'None':
209 xdict['command_inputs'] = '--input_tab "$input1"'
210 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/>' % self.inputFormats
211 else:
212 xdict['command_inputs'] = '' # assume no input - eg a random data generator
213 xdict['inputs'] = ''
214 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/>\n' % self.toolname
215 xdict['toolname'] = self.toolname
216 xdict['toolid'] = self.toolid
217 xdict['interpreter'] = self.opts.interpreter
218 xdict['scriptname'] = self.sfile
219 if self.opts.make_HTML:
220 xdict['command_outputs'] += '--output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
221 xdict['outputs'] += '<data format="html" name="html_file" label="${job_name}.html"/>\n'
222 if self.opts.output_tab <> 'None':
223 xdict['command_outputs'] += '--output_tab "$tab_file"'
224 xdict['outputs'] += '<data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
225 xdict['command'] = newCommand % xdict
226 xmls = newXML % xdict
227 xf = open(self.xmlfile,'w')
228 xf.write(xmls)
229 xf.write('\n')
230 xf.close()
231 # ready for the tarball
232
233
234 def makeTooltar(self):
235 """
236 a tool is a gz tarball with eg
237 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
238 """
239 retval = self.run()
240 if retval:
241 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
242 sys.exit(1)
243 self.makeXML()
244 tdir = self.toolname
245 os.mkdir(tdir)
246 if self.opts.input_tab <> 'None': # we may have test data?
247 testdir = os.path.join(tdir,'test-data')
248 os.mkdir(testdir) # make tests directory
249 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
250 if self.opts.output_tab <> 'None':
251 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
252 if self.opts.make_HTML:
253 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
254 if self.opts.output_dir:
255 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
256 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
257 shutil.copyfile(self.pyfile,os.path.join(tdir,'%s.py' % self.toolname))
258 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
259 tarpath = "%s.gz" % self.toolname
260 tar = tarfile.open(tarpath, "w:gz")
261 tar.add(tdir,arcname=self.toolname)
262 tar.close()
263 shutil.copy(tarpath,self.opts.new_tool)
264 shutil.rmtree(tdir)
265 ## TODO: replace with optional direct upload to local toolshed?
266 return retval
267
268 def compressPDF(self,inpdf=None,thumbformat='png'):
269 """need absolute path to pdf
270 """
271 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
272 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
273 sto = open(hlog,'w')
274 outpdf = '%s_compressed' % inpdf
275 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
276 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
277 retval1 = x.wait()
278 if retval1 == 0:
279 os.unlink(inpdf)
280 shutil.move(outpdf,inpdf)
281 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
282 cl2 = ['convert', inpdf, outpng]
283 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
284 retval2 = x.wait()
285 sto.close()
286 retval = retval1 or retval2
287 return retval
288
289
290 def getfSize(self,fpath,outpath):
291 """
292 format a nice file size string
293 """
294 size = ''
295 fp = os.path.join(outpath,fpath)
296 if os.path.isfile(fp):
297 n = float(os.path.getsize(fp))
298 if n > 2**20:
299 size = ' (%1.1f MB)' % (n/2**20)
300 elif n > 2**10:
301 size = ' (%1.1f KB)' % (n/2**10)
302 elif n > 0:
303 size = ' (%d B)' % (int(n))
304 return size
305
306 def makeHtml(self):
307 """ Create an HTML file content to list all the artefacts found in the output_dir
308 """
309
310 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
311 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
312 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
313 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
314 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
315 <title></title>
316 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
317 </head>
318 <body>
319 <div class="document">
320 """
321 galhtmlattr = """<hr/><b><a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory Script Wrapper</a> tool output %s run at %s</b><br/>"""
322 galhtmlpostfix = """</div></body></html>\n"""
323
324 flist = os.listdir(self.opts.output_dir)
325 flist = [x for x in flist if x <> 'Rplots.pdf']
326 flist.sort()
327 html = [galhtmlprefix % progname,]
328 html.append('<h2>Galaxy %s outputs run at %s</h2><br/>\n' % (self.toolname,timenow()))
329 fhtml = []
330 if len(flist) > 0:
331 html.append('<table cellpadding="3" cellspacing="3">\n')
332 for fname in flist:
333 dname,e = os.path.splitext(fname)
334 sfsize = self.getfSize(fname,self.opts.output_dir)
335 if e.lower() == '.pdf' : # compress and make a thumbnail
336 thumb = '%s.%s' % (dname,self.thumbformat)
337 pdff = os.path.join(self.opts.output_dir,fname)
338 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
339 if retval == 0:
340 s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname)
341 html.append(s)
342 fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
343 else:
344 fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
345 html.append('</table>\n')
346 if len(fhtml) > 0:
347 fhtml.insert(0,'<ul>')
348 fhtml.append('</ul><br/>')
349 html += fhtml # add all non-pdf files to the end of the display
350 else:
351 html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter)
352 html.append('<h3>%s log follows below</h3><hr/><pre><br/>\n' % self.opts.interpreter)
353 rlog = open(self.tlog,'r').readlines()
354 html += rlog
355 html.append('<br/>%s CL = %s<br/>\n' % (self.toolname,' '.join(sys.argv)))
356 html.append('</pre>\n')
357 html.append(galhtmlattr % (progname,timenow()))
358 html.append(galhtmlpostfix)
359 htmlf = file(self.opts.output_html,'w')
360 htmlf.write('\n'.join(html))
361 htmlf.write('\n')
362 htmlf.close()
363 self.html = html
364
365
366 def run(self):
367 """
368 """
369 if self.opts.output_dir:
370 sto = open(self.tlog,'w')
371 p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
372 else:
373 p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)
374 p.stdin.write(self.script)
375 p.stdin.close()
376 retval = p.wait()
377 if self.opts.output_dir:
378 sto.close()
379 if self.opts.make_HTML:
380 self.makeHtml()
381 return retval
382
383
384 def main():
385 u = """
386 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
387 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
388 </command>
389 """
390 op = optparse.OptionParser()
391 a = op.add_option
392 a('--script_path',default=None)
393 a('--tool_name',default=None)
394 a('--interpreter',default=None)
395 a('--output_dir',default=None)
396 a('--output_html',default=None)
397 a('--input_tab',default="None")
398 a('--output_tab',default="None")
399 a('--user_email',default=None)
400 a('--bad_user',default=None)
401 a('--make_Tool',default=None)
402 a('--make_HTML',default=None)
403 a('--help_text',default=None)
404 a('--tool_desc',default=None)
405 a('--new_tool',default=None)
406 opts, args = op.parse_args()
407 assert not opts.bad_user,'%s is NOT authorized to use this tool. Please ask your friendly admin to add their ID to admin_users in universe_wsgi.ini' % (opts.bad_user)
408 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
409 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
410 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
411 if opts.output_dir:
412 try:
413 os.makedirs(opts.output_dir)
414 except:
415 pass
416 r = ScriptRunner(opts)
417 if opts.make_Tool:
418 retcode = r.makeTooltar()
419 else:
420 retcode = r.run()
421 if retcode:
422 sys.exit(retcode) # indicate failure to job runner
423
424
425 if __name__ == "__main__":
426 main()
427
428