comparison fubar-galaxytoolfactory-12000aa165b6/rgToolFactory.py @ 9:b5a672e1d4fe draft

Bump version and fix name in tool panel
author fubar
date Mon, 25 Jun 2012 02:38:50 -0400
parents
children
comparison
equal deleted inserted replaced
8:8c2ad439ad1a 9:b5a672e1d4fe
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
11 # It also serves as the wrapper for the new tool.
12 #
13 # you paste and run your script
14 # Only works for simple scripts that read one input from the history.
15 # Optionally can write one new history dataset,
16 # and optionally collect any number of outputs into links on an autogenerated HTML page.
17
18 # DO NOT install on a public or important site - please.
19
20 # installed generated tools are fine if the script is safe.
21 # They just run normally and their user cannot do anything unusually insecure
22 # but please, practice safe toolshed.
23 # Read the fucking code before you install any tool
24 # especially this one
25
26 # After you get the script working on some test data, you can
27 # optionally generate a toolshed compatible gzip file
28 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
29 # safe and largely automated installation in a production Galaxy.
30
31 # If you opt for an HTML output, you get all the script outputs arranged
32 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
33 # Ugly but really inexpensive.
34 #
35 # Patches appreciated please.
36 #
37 #
38 # long route to June 2012 product
39 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
40 # derived from an integrated script model
41 # called rgBaseScriptWrapper.py
42 # Note to the unwary:
43 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
44 # There is nothing stopping a malicious user doing whatever they choose
45 # Extremely dangerous!!
46 # Totally insecure. So, trusted users only
47 #
48 # preferred model is a developer using their throw away workstation instance - ie a private site.
49 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
50 #
51
52 import sys
53 import shutil
54 import subprocess
55 import os
56 import time
57 import tempfile
58 import optparse
59 import tarfile
60 import re
61 import shutil
62
63 progname = os.path.split(sys.argv[0])[1]
64 myversion = 'V000.2 June 2012'
65 verbose = False
66 debug = False
67 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
68
69 def timenow():
70 """return current time as a string
71 """
72 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
73
74
75 class ScriptRunner:
76 """class is a wrapper for an arbitrary script
77 """
78
79 def __init__(self,opts=None):
80 """
81 cleanup inputs, setup some outputs
82
83 """
84 if opts.output_dir: # simplify for the tool tarball
85 os.chdir(opts.output_dir)
86 self.thumbformat = 'jpg'
87 self.opts = opts
88 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
89 self.toolid = self.toolname
90 s = open(self.opts.script_path,'r').readlines()
91 self.script = ''.join(s)
92 self.indentedScript = ''.join([' %s' % x for x in s]) # for restructured text in help
93 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
94 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
95 self.xmlfile = '%s.xml' % self.toolname
96 self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
97 if opts.output_dir: # may not want these complexities
98 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
99 artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname)
100 artifact = open(artifactpath,'w')
101 artifact.write(self.script)
102 artifact.write('\n')
103 artifact.close()
104 if opts.make_Tool: # need this code and the user script for the tarball
105 localscript = open(self.sfile,'w')
106 localscript.write(self.script)
107 localscript.close()
108 self.cl = []
109 self.html = []
110 a = self.cl.append
111 a(opts.interpreter)
112 a('-') # use stdin
113 a(opts.input_tab)
114 a(opts.output_tab)
115 self.outFormats = 'tabular' # TODO make this an option at tool generation time
116 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
117 self.test1Input = '%s_test1_input.xls' % self.toolname
118 self.test1Output = '%s_test1_output.xls' % self.toolname
119 self.test1HTML = '%s_test1_output.html' % self.toolname
120
121 def makeXML(self):
122 """
123 Create a Galaxy xml tool wrapper for the new script as a string to write out
124 fixme - use templating or something less fugly than this example of what we produce
125
126 <tool id="reverse" name="reverse" version="0.01">
127 <description>a tabular file</description>
128 <command interpreter="python">
129 reverse.py --script_path "$runMe" --interpreter "python"
130 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
131 </command>
132 <inputs>
133 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
134
135 </inputs>
136 <outputs>
137 <data format="tabular" name="tab_file" label="${job_name}"/>
138
139 </outputs>
140 <help>
141
142 **What it Does**
143
144 Reverse the columns in a tabular file
145
146 </help>
147 <configfiles>
148 <configfile name="runMe">
149
150 # reverse order of columns in a tabular file
151 import sys
152 inp = sys.argv[1]
153 outp = sys.argv[2]
154 i = open(inp,'r')
155 o = open(outp,'w')
156 for row in i:
157 rs = row.rstrip().split('\t')
158 rs.reverse()
159 o.write('\t'.join(rs))
160 o.write('\n')
161 i.close()
162 o.close()
163
164
165 </configfile>
166 </configfiles>
167 </tool>
168
169 """
170 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="0.01">
171 %(tooldesc)s
172 %(command)s
173 <inputs>
174 %(inputs)s
175 </inputs>
176 <outputs>
177 %(outputs)s
178 </outputs>
179 <configfiles>
180 <configfile name="runMe">
181 %(script)s
182 </configfile>
183 </configfiles>
184 %(tooltests)s
185 <help>
186 %(help)s
187 </help>
188 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
189
190 newCommand="""<command interpreter="python">
191 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
192 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
193 </command>""" # may NOT be an input or htmlout
194 tooltestsTabOnly = """<tests><test>
195 <param name="input1" value="%s" ftype="tabular"/>
196 <param name="job_name" value="test1"/>
197 <param name="runMe" value="$runMe"/>
198 <output name="tab_file" file="%s" ftype="tabular"/>
199 </test></tests>"""
200 tooltestsHTMLOnly = """<tests><test>
201 <param name="input1" value="%s" ftype="tabular"/>
202 <param name="job_name" value="test1"/>
203 <param name="runMe" value="$runMe"/>
204 <output name="html_file" file="%s" ftype="html" lines_diff="5"/>
205 </test></tests>"""
206 tooltestsBoth = """<tests><test>
207 <param name="input1" value="%s" ftype="tabular"/>
208 <param name="job_name" value="test1"/>
209 <param name="runMe" value="$runMe"/>
210 <output name="tab_file" file="%s" ftype="tabular" />
211 <output name="html_file" file="%s" ftype="html" lines_diff="10"/>
212 </test></tests>"""
213 xdict = {}
214 if self.opts.make_HTML and self.opts.output_tab <> 'None':
215 xdict['tooltests'] = tooltestsBoth % (self.test1Input,self.test1Output,self.test1HTML)
216 elif self.opts.make_HTML:
217 xdict['tooltests'] = tooltestsHTMLOnly % (self.test1Input,self.test1HTML)
218 else:
219 xdict['tooltests'] = tooltestsTabOnly % (self.test1Input,self.test1Output)
220 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
221 if self.opts.help_text:
222 xdict['help'] = open(self.opts.help_text,'r').read()
223 else:
224 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
225 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
226 coda.append(self.indentedScript)
227 coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy ToolFactory.' % (self.opts.user_email,timenow()))
228 coda.append('See %s for details of that project' % (toolFactoryURL))
229 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
230 if self.opts.tool_desc:
231 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
232 else:
233 xdict['tooldesc'] = ''
234 xdict['command_outputs'] = ''
235 xdict['outputs'] = ''
236 if self.opts.input_tab <> 'None':
237 xdict['command_inputs'] = '--input_tab "$input1" '
238 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
239 else:
240 xdict['command_inputs'] = '' # assume no input - eg a random data generator
241 xdict['inputs'] = ''
242 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
243 xdict['toolname'] = self.toolname
244 xdict['toolid'] = self.toolid
245 xdict['interpreter'] = self.opts.interpreter
246 xdict['scriptname'] = self.sfile
247 if self.opts.make_HTML:
248 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
249 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
250 if self.opts.output_tab <> 'None':
251 xdict['command_outputs'] += ' --output_tab "$tab_file"'
252 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
253 xdict['command'] = newCommand % xdict
254 xmls = newXML % xdict
255 xf = open(self.xmlfile,'w')
256 xf.write(xmls)
257 xf.write('\n')
258 xf.close()
259 # ready for the tarball
260
261
262 def makeTooltar(self):
263 """
264 a tool is a gz tarball with eg
265 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
266 """
267 retval = self.run()
268 if retval:
269 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
270 sys.exit(1)
271 self.makeXML()
272 tdir = self.toolname
273 os.mkdir(tdir)
274 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
275 testdir = os.path.join(tdir,'test-data')
276 os.mkdir(testdir) # make tests directory
277 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
278 if self.opts.output_tab <> 'None':
279 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
280 if self.opts.make_HTML:
281 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
282 if self.opts.output_dir:
283 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
284 op = '%s.py' % self.toolname # new name
285 outpiname = os.path.join(tdir,op) # path for the tool tarball
286 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
287 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
288 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
289 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
290 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
291 notes += pi
292 outpi = open(outpiname,'w')
293 outpi.write(''.join(notes))
294 outpi.write('\n')
295 outpi.close()
296 print >> sys.stdout, 'wrote %s' % outpiname
297 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
298 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
299 tarpath = "%s.gz" % self.toolname
300 tar = tarfile.open(tarpath, "w:gz")
301 tar.add(tdir,arcname=self.toolname)
302 tar.close()
303 shutil.copyfile(tarpath,self.opts.new_tool)
304 shutil.rmtree(tdir)
305 ## TODO: replace with optional direct upload to local toolshed?
306 return retval
307
308 def compressPDF(self,inpdf=None,thumbformat='png'):
309 """need absolute path to pdf
310 """
311 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
312 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
313 sto = open(hlog,'w')
314 outpdf = '%s_compressed' % inpdf
315 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
316 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
317 retval1 = x.wait()
318 if retval1 == 0:
319 os.unlink(inpdf)
320 shutil.move(outpdf,inpdf)
321 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
322 cl2 = ['convert', inpdf, outpng]
323 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
324 retval2 = x.wait()
325 sto.close()
326 retval = retval1 or retval2
327 return retval
328
329
330 def getfSize(self,fpath,outpath):
331 """
332 format a nice file size string
333 """
334 size = ''
335 fp = os.path.join(outpath,fpath)
336 if os.path.isfile(fp):
337 n = float(os.path.getsize(fp))
338 if n > 2**20:
339 size = ' (%1.1f MB)' % (n/2**20)
340 elif n > 2**10:
341 size = ' (%1.1f KB)' % (n/2**10)
342 elif n > 0:
343 size = ' (%d B)' % (int(n))
344 return size
345
346 def makeHtml(self):
347 """ Create an HTML file content to list all the artefacts found in the output_dir
348 """
349
350 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
351 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
352 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
353 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
354 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
355 <title></title>
356 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
357 </head>
358 <body>
359 <div class="document">
360 """
361 galhtmlattr = """<hr/><b><a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory Script Wrapper</a> tool output %s run at %s</b><br/>"""
362 galhtmlpostfix = """</div></body></html>\n"""
363
364 flist = os.listdir(self.opts.output_dir)
365 flist = [x for x in flist if x <> 'Rplots.pdf']
366 flist.sort()
367 html = [galhtmlprefix % progname,]
368 html.append('<h2>Galaxy %s outputs run at %s</h2><br/>\n' % (self.toolname,timenow()))
369 fhtml = []
370 if len(flist) > 0:
371 html.append('<table cellpadding="3" cellspacing="3">\n')
372 for fname in flist:
373 dname,e = os.path.splitext(fname)
374 sfsize = self.getfSize(fname,self.opts.output_dir)
375 if e.lower() == '.pdf' : # compress and make a thumbnail
376 thumb = '%s.%s' % (dname,self.thumbformat)
377 pdff = os.path.join(self.opts.output_dir,fname)
378 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
379 if retval == 0:
380 s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname)
381 html.append(s)
382 fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
383 else:
384 fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
385 html.append('</table>\n')
386 if len(fhtml) > 0:
387 fhtml.insert(0,'<ul>')
388 fhtml.append('</ul><br/>')
389 html += fhtml # add all non-pdf files to the end of the display
390 else:
391 html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter)
392 html.append('<h3>%s log follows below</h3><hr/><pre><br/>\n' % self.opts.interpreter)
393 rlog = open(self.tlog,'r').readlines()
394 html += rlog
395 html.append('<br/>%s CL = %s<br/>\n' % (self.toolname,' '.join(sys.argv)))
396 html.append('</pre>\n')
397 html.append(galhtmlattr % (progname,timenow()))
398 html.append(galhtmlpostfix)
399 htmlf = file(self.opts.output_html,'w')
400 htmlf.write('\n'.join(html))
401 htmlf.write('\n')
402 htmlf.close()
403 self.html = html
404
405
406 def run(self):
407 """
408 """
409 if self.opts.output_dir:
410 sto = open(self.tlog,'w')
411 p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
412 else:
413 p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)
414 p.stdin.write(self.script)
415 p.stdin.close()
416 retval = p.wait()
417 if self.opts.output_dir:
418 sto.close()
419 if self.opts.make_HTML:
420 self.makeHtml()
421 return retval
422
423
424 def main():
425 u = """
426 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
427 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
428 </command>
429 """
430 op = optparse.OptionParser()
431 a = op.add_option
432 a('--script_path',default=None)
433 a('--tool_name',default=None)
434 a('--interpreter',default=None)
435 a('--output_dir',default=None)
436 a('--output_html',default=None)
437 a('--input_tab',default="None")
438 a('--output_tab',default="None")
439 a('--user_email',default='Unknown')
440 a('--bad_user',default=None)
441 a('--make_Tool',default=None)
442 a('--make_HTML',default=None)
443 a('--help_text',default=None)
444 a('--tool_desc',default=None)
445 a('--new_tool',default=None)
446 opts, args = op.parse_args()
447 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
448 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
449 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
450 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
451 if opts.output_dir:
452 try:
453 os.makedirs(opts.output_dir)
454 except:
455 pass
456 r = ScriptRunner(opts)
457 if opts.make_Tool:
458 retcode = r.makeTooltar()
459 else:
460 retcode = r.run()
461 if retcode:
462 sys.exit(retcode) # indicate failure to job runner
463
464
465 if __name__ == "__main__":
466 main()
467
468