comparison fubar-galaxytoolfactory-008551b45fdb/rgToolFactory.py @ 6:3cf6c3cf837b draft

Uploaded
author fubar
date Sat, 16 Jun 2012 23:34:42 -0400
parents
children
comparison
equal deleted inserted replaced
5:de57e1cd72ae 6:3cf6c3cf837b
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
11 # It also serves as the wrapper for the new tool.
12 #
13 # you paste and run your script
14 # Only works for simple scripts that read one input from the history.
15 # Optionally can write one new history dataset,
16 # and optionally collect any number of outputs into links on an autogenerated HTML page.
17
18 # DO NOT install on a public or important site - please.
19
20 # installed generated tools are fine if the script is safe.
21 # They just run normally and their user cannot do anything unusually insecure
22 # but please, practice safe toolshed.
23 # Read the fucking code before you install any tool
24 # especially this one
25
26 # After you get the script working on some test data, you can
27 # optionally generate a toolshed compatible gzip file
28 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
29 # safe and largely automated installation in a production Galaxy.
30
31 # If you opt for an HTML output, you get all the script outputs arranged
32 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
33 # Ugly but really inexpensive.
34 #
35 # Patches appreciated please.
36 #
37 #
38 # long route to June 2012 product
39 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
40 # derived from an integrated script model
41 # called rgBaseScriptWrapper.py
42 # Note to the unwary:
43 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
44 # There is nothing stopping a malicious user doing whatever they choose
45 # Extremely dangerous!!
46 # Totally insecure. So, trusted users only
47 #
48 # preferred model is a developer using their throw away workstation instance - ie a private site.
49 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
50 #
51
52 import sys
53 import shutil
54 import subprocess
55 import os
56 import time
57 import tempfile
58 import optparse
59 import tarfile
60 import re
61 import shutil
62
63 progname = os.path.split(sys.argv[0])[1]
64 myversion = 'V000.2 June 2012'
65 verbose = False
66 debug = False
67
68
69 def timenow():
70 """return current time as a string
71 """
72 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
73
74
75 class ScriptRunner:
76 """class is a wrapper for an arbitrary script
77 """
78
79 def __init__(self,opts=None):
80 """
81 cleanup inputs, setup some outputs
82
83 """
84 if opts.output_dir: # simplify for the tool tarball
85 os.chdir(opts.output_dir)
86 self.thumbformat = 'jpg'
87 self.opts = opts
88 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
89 self.toolid = self.toolname
90 s = open(self.opts.script_path,'r').read()
91 self.script = s
92 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
93 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
94 self.xmlfile = '%s.xml' % self.toolname
95 self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
96 if opts.output_dir: # may not want these complexities
97 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
98 artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname)
99 artifact = open(artifactpath,'w')
100 artifact.write(self.script)
101 artifact.write('\n')
102 artifact.close()
103 if opts.make_Tool: # need this code and the user script for the tarball
104 localscript = open(self.sfile,'w')
105 localscript.write(self.script)
106 localscript.close()
107 self.cl = []
108 self.html = []
109 a = self.cl.append
110 a(opts.interpreter)
111 a('-') # use stdin
112 a(opts.input_tab)
113 a(opts.output_tab)
114 self.outFormats = 'tabular' # TODO make this an option at tool generation time
115 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
116 self.test1Input = '%s_test1_input.xls' % self.toolname
117 self.test1Output = '%s_test1_output.xls' % self.toolname
118 self.test1HTML = '%s_test1_output.html' % self.toolname
119
120 def makeXML(self):
121 """
122 Create a Galaxy xml tool wrapper for the new script as a string to write out
123 fixme - use templating or something less fugly than this example of what we produce
124
125 <tool id="reverse" name="reverse" version="0.01">
126 <description>a tabular file</description>
127 <command interpreter="python">
128 reverse.py --script_path "$runMe" --interpreter "python"
129 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
130 </command>
131 <inputs>
132 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
133
134 </inputs>
135 <outputs>
136 <data format="tabular" name="tab_file" label="${job_name}"/>
137
138 </outputs>
139 <help>
140
141 **What it Does**
142
143 Reverse the columns in a tabular file
144
145 </help>
146 <configfiles>
147 <configfile name="runMe">
148
149 # reverse order of columns in a tabular file
150 import sys
151 inp = sys.argv[1]
152 outp = sys.argv[2]
153 i = open(inp,'r')
154 o = open(outp,'w')
155 for row in i:
156 rs = row.rstrip().split('\t')
157 rs.reverse()
158 o.write('\t'.join(rs))
159 o.write('\n')
160 i.close()
161 o.close()
162
163
164 </configfile>
165 </configfiles>
166 </tool>
167
168 """
169 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="0.01">
170 %(tooldesc)s
171 %(command)s
172 <inputs>
173 %(inputs)s
174 </inputs>
175 <outputs>
176 %(outputs)s
177 </outputs>
178 <help>
179 %(help)s
180 </help>
181 <configfiles>
182 <configfile name="runMe">
183 %(script)s
184 </configfile>
185 </configfiles>
186 %(tooltests)s
187 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
188
189 newCommand="""<command interpreter="python">
190 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
191 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
192 </command>""" # may NOT be an input or htmlout
193 tooltestsTabOnly = """<tests><test>
194 <param name="input1" value="%s" ftype="tabular"/>
195 <param name="job_name" value="test1"/>
196 <param name="runMe" value="$runMe"/>
197 <output name="tab_file" file="%s" ftype="tabular"/>
198 </test></tests>"""
199 tooltestsHTMLOnly = """<tests><test>
200 <param name="input1" value="%s" ftype="tabular"/>
201 <param name="job_name" value="test1"/>
202 <param name="runMe" value="$runMe"/>
203 <output name="html_file" file="%s" ftype="html" lines_diff="5"/>
204 </test></tests>"""
205 tooltestsBoth = """<tests><test>
206 <param name="input1" value="%s" ftype="tabular"/>
207 <param name="job_name" value="test1"/>
208 <param name="runMe" value="$runMe"/>
209 <output name="tab_file" file="%s" ftype="tabular" />
210 <output name="html_file" file="%s" ftype="html" lines_diff="10"/>
211 </test></tests>"""
212 xdict = {}
213 if self.opts.make_HTML and self.opts.output_tab <> 'None':
214 xdict['tooltests'] = tooltestsBoth % (self.test1Input,self.test1Output,self.test1HTML)
215 elif self.opts.make_HTML:
216 xdict['tooltests'] = tooltestsHTMLOnly % (self.test1Input,self.test1HTML)
217 else:
218 xdict['tooltests'] = tooltestsTabOnly % (self.test1Input,self.test1Output)
219 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
220 if self.opts.help_text:
221 xdict['help'] = open(self.opts.help_text,'r').read()
222 else:
223 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
224 if self.opts.tool_desc:
225 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
226 else:
227 xdict['tooldesc'] = ''
228 xdict['command_outputs'] = ''
229 xdict['outputs'] = ''
230 if self.opts.input_tab <> 'None':
231 xdict['command_inputs'] = '--input_tab "$input1" '
232 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
233 else:
234 xdict['command_inputs'] = '' # assume no input - eg a random data generator
235 xdict['inputs'] = ''
236 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
237 xdict['toolname'] = self.toolname
238 xdict['toolid'] = self.toolid
239 xdict['interpreter'] = self.opts.interpreter
240 xdict['scriptname'] = self.sfile
241 if self.opts.make_HTML:
242 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
243 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
244 if self.opts.output_tab <> 'None':
245 xdict['command_outputs'] += ' --output_tab "$tab_file"'
246 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
247 xdict['command'] = newCommand % xdict
248 xmls = newXML % xdict
249 xf = open(self.xmlfile,'w')
250 xf.write(xmls)
251 xf.write('\n')
252 xf.close()
253 # ready for the tarball
254
255
256 def makeTooltar(self):
257 """
258 a tool is a gz tarball with eg
259 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
260 """
261 retval = self.run()
262 if retval:
263 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
264 sys.exit(1)
265 self.makeXML()
266 tdir = self.toolname
267 os.mkdir(tdir)
268 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
269 testdir = os.path.join(tdir,'test-data')
270 os.mkdir(testdir) # make tests directory
271 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
272 if self.opts.output_tab <> 'None':
273 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
274 if self.opts.make_HTML:
275 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
276 if self.opts.output_dir:
277 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
278 op = '%s.py' % self.toolname # new name
279 outpiname = os.path.join(tdir,op) # path for the tool tarball
280 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
281 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
282 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
283 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
284 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
285 notes += pi
286 outpi = open(outpiname,'w')
287 outpi.write(''.join(notes))
288 outpi.write('\n')
289 outpi.close()
290 print >> sys.stdout, 'wrote %s' % outpiname
291 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
292 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
293 tarpath = "%s.gz" % self.toolname
294 tar = tarfile.open(tarpath, "w:gz")
295 tar.add(tdir,arcname=self.toolname)
296 tar.close()
297 shutil.copyfile(tarpath,self.opts.new_tool)
298 shutil.rmtree(tdir)
299 ## TODO: replace with optional direct upload to local toolshed?
300 return retval
301
302 def compressPDF(self,inpdf=None,thumbformat='png'):
303 """need absolute path to pdf
304 """
305 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
306 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
307 sto = open(hlog,'w')
308 outpdf = '%s_compressed' % inpdf
309 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
310 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
311 retval1 = x.wait()
312 if retval1 == 0:
313 os.unlink(inpdf)
314 shutil.move(outpdf,inpdf)
315 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
316 cl2 = ['convert', inpdf, outpng]
317 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
318 retval2 = x.wait()
319 sto.close()
320 retval = retval1 or retval2
321 return retval
322
323
324 def getfSize(self,fpath,outpath):
325 """
326 format a nice file size string
327 """
328 size = ''
329 fp = os.path.join(outpath,fpath)
330 if os.path.isfile(fp):
331 n = float(os.path.getsize(fp))
332 if n > 2**20:
333 size = ' (%1.1f MB)' % (n/2**20)
334 elif n > 2**10:
335 size = ' (%1.1f KB)' % (n/2**10)
336 elif n > 0:
337 size = ' (%d B)' % (int(n))
338 return size
339
340 def makeHtml(self):
341 """ Create an HTML file content to list all the artefacts found in the output_dir
342 """
343
344 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
345 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
346 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
347 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
348 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
349 <title></title>
350 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
351 </head>
352 <body>
353 <div class="document">
354 """
355 galhtmlattr = """<hr/><b><a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory Script Wrapper</a> tool output %s run at %s</b><br/>"""
356 galhtmlpostfix = """</div></body></html>\n"""
357
358 flist = os.listdir(self.opts.output_dir)
359 flist = [x for x in flist if x <> 'Rplots.pdf']
360 flist.sort()
361 html = [galhtmlprefix % progname,]
362 html.append('<h2>Galaxy %s outputs run at %s</h2><br/>\n' % (self.toolname,timenow()))
363 fhtml = []
364 if len(flist) > 0:
365 html.append('<table cellpadding="3" cellspacing="3">\n')
366 for fname in flist:
367 dname,e = os.path.splitext(fname)
368 sfsize = self.getfSize(fname,self.opts.output_dir)
369 if e.lower() == '.pdf' : # compress and make a thumbnail
370 thumb = '%s.%s' % (dname,self.thumbformat)
371 pdff = os.path.join(self.opts.output_dir,fname)
372 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
373 if retval == 0:
374 s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname)
375 html.append(s)
376 fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
377 else:
378 fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
379 html.append('</table>\n')
380 if len(fhtml) > 0:
381 fhtml.insert(0,'<ul>')
382 fhtml.append('</ul><br/>')
383 html += fhtml # add all non-pdf files to the end of the display
384 else:
385 html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter)
386 html.append('<h3>%s log follows below</h3><hr/><pre><br/>\n' % self.opts.interpreter)
387 rlog = open(self.tlog,'r').readlines()
388 html += rlog
389 html.append('<br/>%s CL = %s<br/>\n' % (self.toolname,' '.join(sys.argv)))
390 html.append('</pre>\n')
391 html.append(galhtmlattr % (progname,timenow()))
392 html.append(galhtmlpostfix)
393 htmlf = file(self.opts.output_html,'w')
394 htmlf.write('\n'.join(html))
395 htmlf.write('\n')
396 htmlf.close()
397 self.html = html
398
399
400 def run(self):
401 """
402 """
403 if self.opts.output_dir:
404 sto = open(self.tlog,'w')
405 p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
406 else:
407 p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)
408 p.stdin.write(self.script)
409 p.stdin.close()
410 retval = p.wait()
411 if self.opts.output_dir:
412 sto.close()
413 if self.opts.make_HTML:
414 self.makeHtml()
415 return retval
416
417
418 def main():
419 u = """
420 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
421 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
422 </command>
423 """
424 op = optparse.OptionParser()
425 a = op.add_option
426 a('--script_path',default=None)
427 a('--tool_name',default=None)
428 a('--interpreter',default=None)
429 a('--output_dir',default=None)
430 a('--output_html',default=None)
431 a('--input_tab',default="None")
432 a('--output_tab',default="None")
433 a('--user_email',default='Unknown')
434 a('--bad_user',default=None)
435 a('--make_Tool',default=None)
436 a('--make_HTML',default=None)
437 a('--help_text',default=None)
438 a('--tool_desc',default=None)
439 a('--new_tool',default=None)
440 opts, args = op.parse_args()
441 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
442 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
443 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
444 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
445 if opts.output_dir:
446 try:
447 os.makedirs(opts.output_dir)
448 except:
449 pass
450 r = ScriptRunner(opts)
451 if opts.make_Tool:
452 retcode = r.makeTooltar()
453 else:
454 retcode = r.run()
455 if retcode:
456 sys.exit(retcode) # indicate failure to job runner
457
458
459 if __name__ == "__main__":
460 main()
461
462