comparison scriptrunner.py @ 0:b6211faea403 draft

planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit ae672027942a606c1a5e302348279a5493151c11-dirty
author mvdbeek
date Fri, 08 Jul 2016 15:09:10 -0400
parents
children 813b55d27809
comparison
equal deleted inserted replaced
-1:000000000000 0:b6211faea403
1 # DockerToolFactory.py
2 # see https://github.com/mvdbeek/scriptrunner
3
4 import sys
5 import shutil
6 import subprocess
7 import os
8 import time
9 import tempfile
10 import argparse
11 import getpass
12 import tarfile
13 import re
14 import shutil
15 import math
16 import fileinput
17 from os.path import abspath
18
19
20 progname = os.path.split(sys.argv[0])[1]
21 verbose = False
22 debug = False
23
24 def timenow():
25 """return current time as a string
26 """
27 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
28
29 html_escape_table = {
30 "&": "&",
31 ">": ">",
32 "<": "&lt;",
33 "$": "\$"
34 }
35
36 def html_escape(text):
37 """Produce entities within text."""
38 return "".join(html_escape_table.get(c,c) for c in text)
39
40 def cmd_exists(cmd):
41 return subprocess.call("type " + cmd, shell=True,
42 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
43
44 def construct_bind(host_path, container_path=False, binds=None, ro=True):
45 #TODO remove container_path if it's alwyas going to be the same as host_path
46 '''build or extend binds dictionary with container path. binds is used
47 to mount all files using the docker-py client.'''
48 if not binds:
49 binds={}
50 if isinstance(host_path, list):
51 for k,v in enumerate(host_path):
52 if not container_path:
53 container_path=host_path[k]
54 binds[host_path[k]]={'bind':container_path, 'ro':ro}
55 container_path=False #could be more elegant
56 return binds
57 else:
58 if not container_path:
59 container_path=host_path
60 binds[host_path]={'bind':container_path, 'ro':ro}
61 return binds
62
63 def switch_to_docker(opts):
64 import docker #need local import, as container does not have docker-py
65 current_user = getpass.getuser()
66 docker_client=docker.Client()
67 toolfactory_path=abspath(sys.argv[0])
68 binds=construct_bind(host_path=opts.script_path, ro=False)
69 binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False)
70 if len(opts.input_tab)>0:
71 binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True)
72 if not opts.output_tab == 'None':
73 binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False)
74 if opts.make_HTML:
75 binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False)
76 binds=construct_bind(binds=binds, host_path=toolfactory_path)
77 volumes=binds.keys()
78 sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir
79 cmd=['python', '-u']+sys.argv+['--dockerized', '1']
80 container=docker_client.create_container(
81 image=opts.docker_image, # Make this configureable through job_conf
82 user=current_user, # TODO: make this configurable on the current user
83 volumes=volumes,
84 command=cmd
85 )
86 docker_client.start(container=container[u'Id'], binds=binds)
87 docker_client.wait(container=container[u'Id'])
88 logs=docker_client.logs(container=container[u'Id'])
89 print "".join([log for log in logs])
90 docker_client.remove_container(container[u'Id'])
91
92 class ScriptRunner:
93 """class is a wrapper for an arbitrary script
94 """
95
96 def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'):
97 """
98 cleanup inputs, setup some outputs
99
100 """
101 self.opts = opts
102 self.scriptname = 'script'
103 self.useGM = cmd_exists('gm')
104 self.useIM = cmd_exists('convert')
105 self.useGS = cmd_exists('gs')
106 self.temp_warned = False # we want only one warning if $TMP not set
107 self.treatbashSpecial = treatbashSpecial
108 self.image_tag = image_tag
109 os.chdir(abspath(opts.output_dir))
110 self.thumbformat = 'png'
111 s = open(self.opts.script_path,'r').readlines()
112 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
113 self.script = '\n'.join(s)
114 fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter))
115 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
116 tscript.write(self.script)
117 tscript.close()
118 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
119 self.escapedScript = '\n'.join([html_escape(x) for x in s])
120 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname)
121 if opts.output_dir: # may not want these complexities
122 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname)
123 art = '%s.%s' % (self.scriptname,opts.interpreter)
124 artpath = os.path.join(self.opts.output_dir,art) # need full path
125 artifact = open(artpath,'w') # use self.sfile as script source for Popen
126 artifact.write(self.script)
127 artifact.close()
128 self.cl = []
129 self.html = []
130 a = self.cl.append
131 a(opts.interpreter)
132 if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
133 a(self.sfile)
134 else:
135 a('-') # stdin
136 for input in opts.input_tab:
137 a(input)
138 if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname
139 a(str(self.scriptname)+'.out')
140 a(opts.output_tab)
141 for param in opts.additional_parameters:
142 param, value=param.split(',')
143 a('--'+param)
144 a(value)
145 self.outFormats = opts.output_format
146 self.inputFormats = [formats for formats in opts.input_formats]
147 self.test1Input = '%s_test1_input.xls' % self.scriptname
148 self.test1Output = '%s_test1_output.xls' % self.scriptname
149 self.test1HTML = '%s_test1_output.html' % self.scriptname
150
151
152 def compressPDF(self,inpdf=None,thumbformat='png'):
153 """need absolute path to pdf
154 note that GS gets confoozled if no $TMP or $TEMP
155 so we set it
156 """
157 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
158 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
159 sto = open(hlog,'a')
160 our_env = os.environ.copy()
161 our_tmp = our_env.get('TMP',None)
162 if not our_tmp:
163 our_tmp = our_env.get('TEMP',None)
164 if not (our_tmp and os.path.exists(our_tmp)):
165 newtmp = os.path.join(self.opts.output_dir,'tmp')
166 try:
167 os.mkdir(newtmp)
168 except:
169 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
170 our_env['TEMP'] = newtmp
171 if not self.temp_warned:
172 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
173 self.temp_warned = True
174 outpdf = '%s_compressed' % inpdf
175 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
176 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
177 retval1 = x.wait()
178 sto.close()
179 if retval1 == 0:
180 os.unlink(inpdf)
181 shutil.move(outpdf,inpdf)
182 os.unlink(hlog)
183 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
184 sto = open(hlog,'w')
185 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
186 # if self.useGM:
187 # cl2 = ['gm', 'convert', inpdf, outpng]
188 # else: # assume imagemagick
189 cl2 = ['convert', inpdf, outpng]
190 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
191 retval2 = x.wait()
192 sto.close()
193 if retval2 == 0:
194 os.unlink(hlog)
195 retval = retval1 or retval2
196 return retval
197
198
199 def getfSize(self,fpath,outpath):
200 """
201 format a nice file size string
202 """
203 size = ''
204 fp = os.path.join(outpath,fpath)
205 if os.path.isfile(fp):
206 size = '0 B'
207 n = float(os.path.getsize(fp))
208 if n > 2**20:
209 size = '%1.1f MB' % (n/2**20)
210 elif n > 2**10:
211 size = '%1.1f KB' % (n/2**10)
212 elif n > 0:
213 size = '%d B' % (int(n))
214 return size
215
216 def makeHtml(self):
217 """ Create an HTML file content to list all the artifacts found in the output_dir
218 """
219
220 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
221 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
222 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
223 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
224 <title></title>
225 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
226 </head>
227 <body>
228 <div class="toolFormBody">
229 """
230 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
231 galhtmlpostfix = """</div></body></html>\n"""
232
233 flist = os.listdir(self.opts.output_dir)
234 flist = [x for x in flist if x <> 'Rplots.pdf']
235 flist.sort()
236 html = []
237 html.append(galhtmlprefix % progname)
238 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow()))
239 fhtml = []
240 if len(flist) > 0:
241 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
242 logfiles.sort()
243 logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)]
244 logfiles.append(abspath(self.tlog)) # make it the last one
245 pdflist = []
246 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
247 for rownum,fname in enumerate(flist):
248 dname,e = os.path.splitext(fname)
249 sfsize = self.getfSize(fname,self.opts.output_dir)
250 if e.lower() == '.pdf' : # compress and make a thumbnail
251 thumb = '%s.%s' % (dname,self.thumbformat)
252 pdff = os.path.join(self.opts.output_dir,fname)
253 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
254 if retval == 0:
255 pdflist.append((fname,thumb))
256 else:
257 pdflist.append((fname,fname))
258 if (rownum+1) % 2 == 0:
259 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
260 else:
261 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
262 for logfname in logfiles: # expect at least tlog - if more
263 if abspath(logfname) == abspath(self.tlog): # handled later
264 sectionname = 'All tool run'
265 if (len(logfiles) > 1):
266 sectionname = 'Other'
267 ourpdfs = pdflist
268 else:
269 realname = os.path.basename(logfname)
270 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
271 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
272 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
273 nacross = 1
274 npdf = len(ourpdfs)
275
276 if npdf > 0:
277 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
278 if int(nacross)**2 != npdf:
279 nacross += 1
280 nacross = int(nacross)
281 width = min(400,int(1200/nacross))
282 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
283 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
284 ntogo = nacross # counter for table row padding with empty cells
285 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
286 for i,paths in enumerate(ourpdfs):
287 fname,thumb = paths
288 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
289 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
290 if ((i+1) % nacross == 0):
291 s += '</tr>\n'
292 ntogo = 0
293 if i < (npdf - 1): # more to come
294 s += '<tr>'
295 ntogo = nacross
296 else:
297 ntogo -= 1
298 html.append(s)
299 if html[-1].strip().endswith('</tr>'):
300 html.append('</table></div>\n')
301 else:
302 if ntogo > 0: # pad
303 html.append('<td>&nbsp;</td>'*ntogo)
304 html.append('</tr></table></div>\n')
305 logt = open(logfname,'r').readlines()
306 logtext = [x for x in logt if x.strip() > '']
307 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
308 if len(logtext) > 1:
309 html.append('\n<pre>\n')
310 html += logtext
311 html.append('\n</pre>\n')
312 else:
313 html.append('%s is empty<br/>' % logfname)
314 if len(fhtml) > 0:
315 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
316 fhtml.append('</table></div><br/>')
317 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
318 html += fhtml # add all non-pdf files to the end of the display
319 else:
320 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
321 html.append(galhtmlpostfix)
322 htmlf = file(self.opts.output_html,'w')
323 htmlf.write('\n'.join(html))
324 htmlf.write('\n')
325 htmlf.close()
326 self.html = html
327
328
329 def run(self):
330 """
331 scripts must be small enough not to fill the pipe!
332 """
333 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
334 retval = self.runBash()
335 else:
336 if self.opts.output_dir:
337 ste = open(self.elog,'w')
338 sto = open(self.tlog,'w')
339 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
340 sto.flush()
341 print("commandline is %s" % (self.cl))
342 print("environment is %s" % (os.environ))
343 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
344 else:
345 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
346 p.stdin.write(self.script)
347 p.stdin.close()
348 retval = p.wait()
349 if self.opts.output_dir:
350 sto.close()
351 ste.close()
352 err = open(self.elog,'r').readlines()
353 if retval <> 0 and err: # problem
354 print >> sys.stderr,err #same problem, need to capture docker stdin/stdout
355 if self.opts.make_HTML:
356 self.makeHtml()
357 return retval
358
359 def runBash(self):
360 """
361 cannot use - for bash so use self.sfile
362 """
363 if self.opts.output_dir:
364 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
365 sto = open(self.tlog,'w')
366 sto.write(s)
367 sto.flush()
368 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
369 else:
370 p = subprocess.Popen(self.cl,shell=False)
371 retval = p.wait()
372 if self.opts.output_dir:
373 sto.close()
374 if self.opts.make_HTML:
375 self.makeHtml()
376 return retval
377
378
379 def main():
380 u = """
381 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
382 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
383 </command>
384 """
385 op = argparse.ArgumentParser()
386 a = op.add_argument
387 a('--docker_image',default=None)
388 a('--script_path',default=None)
389 a('--tool_name',default=None)
390 a('--interpreter',default=None)
391 a('--output_dir',default='./')
392 a('--output_html',default=None)
393 a('--input_tab',default='None', nargs='*')
394 a('--output_tab',default='None')
395 a('--user_email',default='Unknown')
396 a('--bad_user',default=None)
397 a('--make_HTML',default=None)
398 a('--new_tool',default=None)
399 a('--dockerized',default=0)
400 a('--output_format', default='tabular')
401 a('--input_format', dest='input_formats', action='append', default=[])
402 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
403 opts = op.parse_args()
404 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
405 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
406 if opts.output_dir:
407 try:
408 os.makedirs(opts.output_dir)
409 except:
410 pass
411 if opts.dockerized==0:
412 switch_to_docker(opts)
413 return
414 r = ScriptRunner(opts)
415 retcode = r.run()
416 os.unlink(r.sfile)
417 if retcode:
418 sys.exit(retcode) # indicate failure to job runner
419
420
421 if __name__ == "__main__":
422 main()