comparison fubar-galaxytoolfactory-ed4aea83f5d6/rgToolFactory.py @ 13:b98e53651bf7 draft

Fixes to outputs - MUST turn off html sanitizing in universe_wsgi.ini sanitize_all_html = False
author fubar
date Thu, 28 Jun 2012 22:36:30 -0400
parents
children
comparison
equal deleted inserted replaced
12:67920e18fc14 13:b98e53651bf7
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
11 # It also serves as the wrapper for the new tool.
12 #
13 # you paste and run your script
14 # Only works for simple scripts that read one input from the history.
15 # Optionally can write one new history dataset,
16 # and optionally collect any number of outputs into links on an autogenerated HTML page.
17
18 # DO NOT install on a public or important site - please.
19
20 # installed generated tools are fine if the script is safe.
21 # They just run normally and their user cannot do anything unusually insecure
22 # but please, practice safe toolshed.
23 # Read the fucking code before you install any tool
24 # especially this one
25
26 # After you get the script working on some test data, you can
27 # optionally generate a toolshed compatible gzip file
28 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
29 # safe and largely automated installation in a production Galaxy.
30
31 # If you opt for an HTML output, you get all the script outputs arranged
32 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
33 # Ugly but really inexpensive.
34 #
35 # Patches appreciated please.
36 #
37 #
38 # long route to June 2012 product
39 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
40 # derived from an integrated script model
41 # called rgBaseScriptWrapper.py
42 # Note to the unwary:
43 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
44 # There is nothing stopping a malicious user doing whatever they choose
45 # Extremely dangerous!!
46 # Totally insecure. So, trusted users only
47 #
48 # preferred model is a developer using their throw away workstation instance - ie a private site.
49 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
50 #
51
52 import sys
53 import shutil
54 import subprocess
55 import os
56 import time
57 import tempfile
58 import optparse
59 import tarfile
60 import re
61 import shutil
62
63 progname = os.path.split(sys.argv[0])[1]
64 myversion = 'V000.2 June 2012'
65 verbose = False
66 debug = False
67 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
68
69 def timenow():
70 """return current time as a string
71 """
72 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
73
74
75 class ScriptRunner:
76 """class is a wrapper for an arbitrary script
77 """
78
79 def __init__(self,opts=None):
80 """
81 cleanup inputs, setup some outputs
82
83 """
84 if opts.output_dir: # simplify for the tool tarball
85 os.chdir(opts.output_dir)
86 self.thumbformat = 'jpg'
87 self.opts = opts
88 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
89 self.toolid = self.toolname
90 s = open(self.opts.script_path,'r').readlines()
91 self.script = ''.join(s)
92 self.indentedScript = ''.join([' %s' % x for x in s]) # for restructured text in help
93 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
94 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
95 self.xmlfile = '%s.xml' % self.toolname
96 self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
97 if opts.output_dir: # may not want these complexities
98 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
99 artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname)
100 artifact = open(artifactpath,'w')
101 artifact.write(self.script)
102 artifact.write('\n')
103 artifact.close()
104 if opts.make_Tool: # need this code and the user script for the tarball
105 localscript = open(self.sfile,'w')
106 localscript.write(self.script)
107 localscript.close()
108 self.cl = []
109 self.html = []
110 a = self.cl.append
111 a(opts.interpreter)
112 a('-') # use stdin
113 a(opts.input_tab)
114 a(opts.output_tab)
115 self.outFormats = 'tabular' # TODO make this an option at tool generation time
116 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
117 self.test1Input = '%s_test1_input.xls' % self.toolname
118 self.test1Output = '%s_test1_output.xls' % self.toolname
119 self.test1HTML = '%s_test1_output.html' % self.toolname
120
121 def makeXML(self):
122 """
123 Create a Galaxy xml tool wrapper for the new script as a string to write out
124 fixme - use templating or something less fugly than this example of what we produce
125
126 <tool id="reverse" name="reverse" version="0.01">
127 <description>a tabular file</description>
128 <command interpreter="python">
129 reverse.py --script_path "$runMe" --interpreter "python"
130 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
131 </command>
132 <inputs>
133 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
134
135 </inputs>
136 <outputs>
137 <data format="tabular" name="tab_file" label="${job_name}"/>
138
139 </outputs>
140 <help>
141
142 **What it Does**
143
144 Reverse the columns in a tabular file
145
146 </help>
147 <configfiles>
148 <configfile name="runMe">
149
150 # reverse order of columns in a tabular file
151 import sys
152 inp = sys.argv[1]
153 outp = sys.argv[2]
154 i = open(inp,'r')
155 o = open(outp,'w')
156 for row in i:
157 rs = row.rstrip().split('\t')
158 rs.reverse()
159 o.write('\t'.join(rs))
160 o.write('\n')
161 i.close()
162 o.close()
163
164
165 </configfile>
166 </configfiles>
167 </tool>
168
169 """
170 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
171 %(tooldesc)s
172 %(command)s
173 <inputs>
174 %(inputs)s
175 </inputs>
176 <outputs>
177 %(outputs)s
178 </outputs>
179 <configfiles>
180 <configfile name="runMe">
181 %(script)s
182 </configfile>
183 </configfiles>
184 %(tooltests)s
185 <help>
186 %(help)s
187 </help>
188 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
189
190 newCommand="""<command interpreter="python">
191 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
192 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
193 </command>""" # may NOT be an input or htmlout
194 tooltestsTabOnly = """<tests><test>
195 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
196 <param name="job_name" value="test1"/>
197 <param name="runMe" value="$runMe"/>
198 <output name="tab_file" file="%(test1Output)s" ftype="tabular"/>
199 </test></tests>"""
200 tooltestsHTMLOnly = """<tests><test>
201 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
202 <param name="job_name" value="test1"/>
203 <param name="runMe" value="$runMe"/>
204 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
205 </test></tests>"""
206 tooltestsBoth = """<tests><test>
207 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
208 <param name="job_name" value="test1"/>
209 <param name="runMe" value="$runMe"/>
210 <output name="tab_file" file="%(test1Output)s" ftype="tabular" />
211 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
212 </test></tests>"""
213 xdict = {}
214 xdict['tool_version'] = self.opts.tool_version
215 xdict['test1Input'] = self.test1Input
216 xdict['test1HTML'] = self.test1HTML
217 xdict['test1Output'] = self.test1Output
218 if self.opts.make_HTML and self.opts.output_tab <> 'None':
219 xdict['tooltests'] = tooltestsBoth % xdict
220 elif self.opts.make_HTML:
221 xdict['tooltests'] = tooltestsHTMLOnly % xdict
222 else:
223 xdict['tooltests'] = tooltestsTabOnly % xdict
224 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
225 if self.opts.help_text:
226 xdict['help'] = open(self.opts.help_text,'r').read()
227 else:
228 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
229 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
230 coda.append(self.indentedScript)
231 coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy ToolFactory.' % (self.opts.user_email,timenow()))
232 coda.append('See %s for details of that project' % (toolFactoryURL))
233 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
234 if self.opts.tool_desc:
235 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
236 else:
237 xdict['tooldesc'] = ''
238 xdict['command_outputs'] = ''
239 xdict['outputs'] = ''
240 if self.opts.input_tab <> 'None':
241 xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
242 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
243 else:
244 xdict['command_inputs'] = '' # assume no input - eg a random data generator
245 xdict['inputs'] = ''
246 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
247 xdict['toolname'] = self.toolname
248 xdict['toolid'] = self.toolid
249 xdict['interpreter'] = self.opts.interpreter
250 xdict['scriptname'] = self.sfile
251 if self.opts.make_HTML:
252 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
253 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
254 if self.opts.output_tab <> 'None':
255 xdict['command_outputs'] += ' --output_tab "$tab_file"'
256 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
257 xdict['command'] = newCommand % xdict
258 xmls = newXML % xdict
259 xf = open(self.xmlfile,'w')
260 xf.write(xmls)
261 xf.write('\n')
262 xf.close()
263 # ready for the tarball
264
265
266 def makeTooltar(self):
267 """
268 a tool is a gz tarball with eg
269 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
270 """
271 retval = self.run()
272 if retval:
273 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
274 sys.exit(1)
275 self.makeXML()
276 tdir = self.toolname
277 os.mkdir(tdir)
278 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
279 testdir = os.path.join(tdir,'test-data')
280 os.mkdir(testdir) # make tests directory
281 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
282 if self.opts.output_tab <> 'None':
283 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
284 if self.opts.make_HTML:
285 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
286 if self.opts.output_dir:
287 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
288 op = '%s.py' % self.toolname # new name
289 outpiname = os.path.join(tdir,op) # path for the tool tarball
290 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
291 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
292 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
293 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
294 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
295 notes += pi
296 outpi = open(outpiname,'w')
297 outpi.write(''.join(notes))
298 outpi.write('\n')
299 outpi.close()
300 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
301 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
302 tarpath = "%s.gz" % self.toolname
303 tar = tarfile.open(tarpath, "w:gz")
304 tar.add(tdir,arcname=self.toolname)
305 tar.close()
306 shutil.copyfile(tarpath,self.opts.new_tool)
307 shutil.rmtree(tdir)
308 ## TODO: replace with optional direct upload to local toolshed?
309 return retval
310
311 def compressPDF(self,inpdf=None,thumbformat='png'):
312 """need absolute path to pdf
313 """
314 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
315 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
316 sto = open(hlog,'w')
317 outpdf = '%s_compressed' % inpdf
318 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
319 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
320 retval1 = x.wait()
321 if retval1 == 0:
322 os.unlink(inpdf)
323 shutil.move(outpdf,inpdf)
324 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
325 cl2 = ['convert', inpdf, outpng]
326 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
327 retval2 = x.wait()
328 sto.close()
329 retval = retval1 or retval2
330 return retval
331
332
333 def getfSize(self,fpath,outpath):
334 """
335 format a nice file size string
336 """
337 size = ''
338 fp = os.path.join(outpath,fpath)
339 if os.path.isfile(fp):
340 n = float(os.path.getsize(fp))
341 if n > 2**20:
342 size = ' (%1.1f MB)' % (n/2**20)
343 elif n > 2**10:
344 size = ' (%1.1f KB)' % (n/2**10)
345 elif n > 0:
346 size = ' (%d B)' % (int(n))
347 return size
348
349 def makeHtml(self):
350 """ Create an HTML file content to list all the artefacts found in the output_dir
351 """
352
353 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
354 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
355 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
356 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
357 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
358 <title></title>
359 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
360 </head>
361 <body>
362 <div class="toolFormBody">
363 """
364 galhtmlattr = """<hr/><div class="form-row">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
365 galhtmlpostfix = """</div></body></html>\n"""
366
367 flist = os.listdir(self.opts.output_dir)
368 flist = [x for x in flist if x <> 'Rplots.pdf']
369 flist.sort()
370 html = []
371 html.append(galhtmlprefix % progname)
372 html.append('<div class="toolFormTitle">Galaxy Tool "%s" run at %s</div><br/>\n' % (self.toolname,timenow()))
373 fhtml = []
374 if len(flist) > 0:
375 html.append('<div ><table class="simple" cellpadding="3" cellspacing="3">\n')
376 for rownum,fname in enumerate(flist):
377 dname,e = os.path.splitext(fname)
378 sfsize = self.getfSize(fname,self.opts.output_dir)
379 if e.lower() == '.pdf' : # compress and make a thumbnail
380 thumb = '%s.%s' % (dname,self.thumbformat)
381 pdff = os.path.join(self.opts.output_dir,fname)
382 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
383 if retval == 0:
384 s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600" alt="Image called %s"/></a></td></tr>\n' % (fname,thumb,fname,fname)
385 html.append(s)
386 if (rownum+1) % 2 == 0:
387 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
388 else:
389 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
390 html.append('</table></div>\n')
391 if len(fhtml) > 0:
392 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
393 fhtml.append('</table></div><br/>')
394 html += fhtml # add all non-pdf files to the end of the display
395 else:
396 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
397 rlog = open(self.tlog,'r').readlines()
398 if len(rlog) > 1:
399 html.append('<div class="infomessage">%s log follows below<hr/><pre><br/>\n' % self.opts.interpreter)
400 html += rlog
401 html.append('</pre></div>\n')
402 html.append(galhtmlattr % (self.toolname))
403 html.append(galhtmlpostfix)
404 htmlf = file(self.opts.output_html,'w')
405 htmlf.write('\n'.join(html))
406 htmlf.write('\n')
407 htmlf.close()
408 self.html = html
409
410
411 def run(self):
412 """
413 """
414 if self.opts.output_dir:
415 sto = open(self.tlog,'w')
416 p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
417 else:
418 p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)
419 p.stdin.write(self.script)
420 p.stdin.close()
421 retval = p.wait()
422 if self.opts.output_dir:
423 sto.close()
424 if self.opts.make_HTML:
425 self.makeHtml()
426 return retval
427
428
429 def main():
430 u = """
431 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
432 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
433 </command>
434 """
435 op = optparse.OptionParser()
436 a = op.add_option
437 a('--script_path',default=None)
438 a('--tool_name',default=None)
439 a('--interpreter',default=None)
440 a('--output_dir',default=None)
441 a('--output_html',default=None)
442 a('--input_tab',default="None")
443 a('--output_tab',default="None")
444 a('--user_email',default='Unknown')
445 a('--bad_user',default=None)
446 a('--make_Tool',default=None)
447 a('--make_HTML',default=None)
448 a('--help_text',default=None)
449 a('--tool_desc',default=None)
450 a('--new_tool',default=None)
451 a('--tool_version',default=None)
452 opts, args = op.parse_args()
453 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
454 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
455 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
456 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
457 if opts.output_dir:
458 try:
459 os.makedirs(opts.output_dir)
460 except:
461 pass
462 r = ScriptRunner(opts)
463 if opts.make_Tool:
464 retcode = r.makeTooltar()
465 else:
466 retcode = r.run()
467 if retcode:
468 sys.exit(retcode) # indicate failure to job runner
469
470
471 if __name__ == "__main__":
472 main()
473
474