Mercurial > repos > mvdbeek > docker_scriptrunner
comparison scriptrunner.py @ 0:b6211faea403 draft
planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit ae672027942a606c1a5e302348279a5493151c11-dirty
author | mvdbeek |
---|---|
date | Fri, 08 Jul 2016 15:09:10 -0400 |
parents | |
children | 813b55d27809 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b6211faea403 |
---|---|
1 # DockerToolFactory.py | |
2 # see https://github.com/mvdbeek/scriptrunner | |
3 | |
4 import sys | |
5 import shutil | |
6 import subprocess | |
7 import os | |
8 import time | |
9 import tempfile | |
10 import argparse | |
11 import getpass | |
12 import tarfile | |
13 import re | |
14 import shutil | |
15 import math | |
16 import fileinput | |
17 from os.path import abspath | |
18 | |
19 | |
20 progname = os.path.split(sys.argv[0])[1] | |
21 verbose = False | |
22 debug = False | |
23 | |
24 def timenow(): | |
25 """return current time as a string | |
26 """ | |
27 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | |
28 | |
29 html_escape_table = { | |
30 "&": "&", | |
31 ">": ">", | |
32 "<": "<", | |
33 "$": "\$" | |
34 } | |
35 | |
36 def html_escape(text): | |
37 """Produce entities within text.""" | |
38 return "".join(html_escape_table.get(c,c) for c in text) | |
39 | |
40 def cmd_exists(cmd): | |
41 return subprocess.call("type " + cmd, shell=True, | |
42 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 | |
43 | |
44 def construct_bind(host_path, container_path=False, binds=None, ro=True): | |
45 #TODO remove container_path if it's alwyas going to be the same as host_path | |
46 '''build or extend binds dictionary with container path. binds is used | |
47 to mount all files using the docker-py client.''' | |
48 if not binds: | |
49 binds={} | |
50 if isinstance(host_path, list): | |
51 for k,v in enumerate(host_path): | |
52 if not container_path: | |
53 container_path=host_path[k] | |
54 binds[host_path[k]]={'bind':container_path, 'ro':ro} | |
55 container_path=False #could be more elegant | |
56 return binds | |
57 else: | |
58 if not container_path: | |
59 container_path=host_path | |
60 binds[host_path]={'bind':container_path, 'ro':ro} | |
61 return binds | |
62 | |
63 def switch_to_docker(opts): | |
64 import docker #need local import, as container does not have docker-py | |
65 current_user = getpass.getuser() | |
66 docker_client=docker.Client() | |
67 toolfactory_path=abspath(sys.argv[0]) | |
68 binds=construct_bind(host_path=opts.script_path, ro=False) | |
69 binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False) | |
70 if len(opts.input_tab)>0: | |
71 binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True) | |
72 if not opts.output_tab == 'None': | |
73 binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False) | |
74 if opts.make_HTML: | |
75 binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False) | |
76 binds=construct_bind(binds=binds, host_path=toolfactory_path) | |
77 volumes=binds.keys() | |
78 sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir | |
79 cmd=['python', '-u']+sys.argv+['--dockerized', '1'] | |
80 container=docker_client.create_container( | |
81 image=opts.docker_image, # Make this configureable through job_conf | |
82 user=current_user, # TODO: make this configurable on the current user | |
83 volumes=volumes, | |
84 command=cmd | |
85 ) | |
86 docker_client.start(container=container[u'Id'], binds=binds) | |
87 docker_client.wait(container=container[u'Id']) | |
88 logs=docker_client.logs(container=container[u'Id']) | |
89 print "".join([log for log in logs]) | |
90 docker_client.remove_container(container[u'Id']) | |
91 | |
92 class ScriptRunner: | |
93 """class is a wrapper for an arbitrary script | |
94 """ | |
95 | |
96 def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'): | |
97 """ | |
98 cleanup inputs, setup some outputs | |
99 | |
100 """ | |
101 self.opts = opts | |
102 self.scriptname = 'script' | |
103 self.useGM = cmd_exists('gm') | |
104 self.useIM = cmd_exists('convert') | |
105 self.useGS = cmd_exists('gs') | |
106 self.temp_warned = False # we want only one warning if $TMP not set | |
107 self.treatbashSpecial = treatbashSpecial | |
108 self.image_tag = image_tag | |
109 os.chdir(abspath(opts.output_dir)) | |
110 self.thumbformat = 'png' | |
111 s = open(self.opts.script_path,'r').readlines() | |
112 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed | |
113 self.script = '\n'.join(s) | |
114 fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter)) | |
115 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen | |
116 tscript.write(self.script) | |
117 tscript.close() | |
118 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help | |
119 self.escapedScript = '\n'.join([html_escape(x) for x in s]) | |
120 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname) | |
121 if opts.output_dir: # may not want these complexities | |
122 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname) | |
123 art = '%s.%s' % (self.scriptname,opts.interpreter) | |
124 artpath = os.path.join(self.opts.output_dir,art) # need full path | |
125 artifact = open(artpath,'w') # use self.sfile as script source for Popen | |
126 artifact.write(self.script) | |
127 artifact.close() | |
128 self.cl = [] | |
129 self.html = [] | |
130 a = self.cl.append | |
131 a(opts.interpreter) | |
132 if self.treatbashSpecial and opts.interpreter in ['bash','sh']: | |
133 a(self.sfile) | |
134 else: | |
135 a('-') # stdin | |
136 for input in opts.input_tab: | |
137 a(input) | |
138 if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname | |
139 a(str(self.scriptname)+'.out') | |
140 a(opts.output_tab) | |
141 for param in opts.additional_parameters: | |
142 param, value=param.split(',') | |
143 a('--'+param) | |
144 a(value) | |
145 self.outFormats = opts.output_format | |
146 self.inputFormats = [formats for formats in opts.input_formats] | |
147 self.test1Input = '%s_test1_input.xls' % self.scriptname | |
148 self.test1Output = '%s_test1_output.xls' % self.scriptname | |
149 self.test1HTML = '%s_test1_output.html' % self.scriptname | |
150 | |
151 | |
152 def compressPDF(self,inpdf=None,thumbformat='png'): | |
153 """need absolute path to pdf | |
154 note that GS gets confoozled if no $TMP or $TEMP | |
155 so we set it | |
156 """ | |
157 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) | |
158 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) | |
159 sto = open(hlog,'a') | |
160 our_env = os.environ.copy() | |
161 our_tmp = our_env.get('TMP',None) | |
162 if not our_tmp: | |
163 our_tmp = our_env.get('TEMP',None) | |
164 if not (our_tmp and os.path.exists(our_tmp)): | |
165 newtmp = os.path.join(self.opts.output_dir,'tmp') | |
166 try: | |
167 os.mkdir(newtmp) | |
168 except: | |
169 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) | |
170 our_env['TEMP'] = newtmp | |
171 if not self.temp_warned: | |
172 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) | |
173 self.temp_warned = True | |
174 outpdf = '%s_compressed' % inpdf | |
175 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] | |
176 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
177 retval1 = x.wait() | |
178 sto.close() | |
179 if retval1 == 0: | |
180 os.unlink(inpdf) | |
181 shutil.move(outpdf,inpdf) | |
182 os.unlink(hlog) | |
183 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) | |
184 sto = open(hlog,'w') | |
185 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) | |
186 # if self.useGM: | |
187 # cl2 = ['gm', 'convert', inpdf, outpng] | |
188 # else: # assume imagemagick | |
189 cl2 = ['convert', inpdf, outpng] | |
190 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
191 retval2 = x.wait() | |
192 sto.close() | |
193 if retval2 == 0: | |
194 os.unlink(hlog) | |
195 retval = retval1 or retval2 | |
196 return retval | |
197 | |
198 | |
199 def getfSize(self,fpath,outpath): | |
200 """ | |
201 format a nice file size string | |
202 """ | |
203 size = '' | |
204 fp = os.path.join(outpath,fpath) | |
205 if os.path.isfile(fp): | |
206 size = '0 B' | |
207 n = float(os.path.getsize(fp)) | |
208 if n > 2**20: | |
209 size = '%1.1f MB' % (n/2**20) | |
210 elif n > 2**10: | |
211 size = '%1.1f KB' % (n/2**10) | |
212 elif n > 0: | |
213 size = '%d B' % (int(n)) | |
214 return size | |
215 | |
216 def makeHtml(self): | |
217 """ Create an HTML file content to list all the artifacts found in the output_dir | |
218 """ | |
219 | |
220 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
221 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
222 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
223 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> | |
224 <title></title> | |
225 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> | |
226 </head> | |
227 <body> | |
228 <div class="toolFormBody"> | |
229 """ | |
230 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" | |
231 galhtmlpostfix = """</div></body></html>\n""" | |
232 | |
233 flist = os.listdir(self.opts.output_dir) | |
234 flist = [x for x in flist if x <> 'Rplots.pdf'] | |
235 flist.sort() | |
236 html = [] | |
237 html.append(galhtmlprefix % progname) | |
238 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow())) | |
239 fhtml = [] | |
240 if len(flist) > 0: | |
241 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections | |
242 logfiles.sort() | |
243 logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)] | |
244 logfiles.append(abspath(self.tlog)) # make it the last one | |
245 pdflist = [] | |
246 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) | |
247 for rownum,fname in enumerate(flist): | |
248 dname,e = os.path.splitext(fname) | |
249 sfsize = self.getfSize(fname,self.opts.output_dir) | |
250 if e.lower() == '.pdf' : # compress and make a thumbnail | |
251 thumb = '%s.%s' % (dname,self.thumbformat) | |
252 pdff = os.path.join(self.opts.output_dir,fname) | |
253 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) | |
254 if retval == 0: | |
255 pdflist.append((fname,thumb)) | |
256 else: | |
257 pdflist.append((fname,fname)) | |
258 if (rownum+1) % 2 == 0: | |
259 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
260 else: | |
261 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
262 for logfname in logfiles: # expect at least tlog - if more | |
263 if abspath(logfname) == abspath(self.tlog): # handled later | |
264 sectionname = 'All tool run' | |
265 if (len(logfiles) > 1): | |
266 sectionname = 'Other' | |
267 ourpdfs = pdflist | |
268 else: | |
269 realname = os.path.basename(logfname) | |
270 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log | |
271 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] | |
272 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove | |
273 nacross = 1 | |
274 npdf = len(ourpdfs) | |
275 | |
276 if npdf > 0: | |
277 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) | |
278 if int(nacross)**2 != npdf: | |
279 nacross += 1 | |
280 nacross = int(nacross) | |
281 width = min(400,int(1200/nacross)) | |
282 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) | |
283 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') | |
284 ntogo = nacross # counter for table row padding with empty cells | |
285 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') | |
286 for i,paths in enumerate(ourpdfs): | |
287 fname,thumb = paths | |
288 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" | |
289 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) | |
290 if ((i+1) % nacross == 0): | |
291 s += '</tr>\n' | |
292 ntogo = 0 | |
293 if i < (npdf - 1): # more to come | |
294 s += '<tr>' | |
295 ntogo = nacross | |
296 else: | |
297 ntogo -= 1 | |
298 html.append(s) | |
299 if html[-1].strip().endswith('</tr>'): | |
300 html.append('</table></div>\n') | |
301 else: | |
302 if ntogo > 0: # pad | |
303 html.append('<td> </td>'*ntogo) | |
304 html.append('</tr></table></div>\n') | |
305 logt = open(logfname,'r').readlines() | |
306 logtext = [x for x in logt if x.strip() > ''] | |
307 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) | |
308 if len(logtext) > 1: | |
309 html.append('\n<pre>\n') | |
310 html += logtext | |
311 html.append('\n</pre>\n') | |
312 else: | |
313 html.append('%s is empty<br/>' % logfname) | |
314 if len(fhtml) > 0: | |
315 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') | |
316 fhtml.append('</table></div><br/>') | |
317 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') | |
318 html += fhtml # add all non-pdf files to the end of the display | |
319 else: | |
320 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) | |
321 html.append(galhtmlpostfix) | |
322 htmlf = file(self.opts.output_html,'w') | |
323 htmlf.write('\n'.join(html)) | |
324 htmlf.write('\n') | |
325 htmlf.close() | |
326 self.html = html | |
327 | |
328 | |
329 def run(self): | |
330 """ | |
331 scripts must be small enough not to fill the pipe! | |
332 """ | |
333 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: | |
334 retval = self.runBash() | |
335 else: | |
336 if self.opts.output_dir: | |
337 ste = open(self.elog,'w') | |
338 sto = open(self.tlog,'w') | |
339 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) | |
340 sto.flush() | |
341 print("commandline is %s" % (self.cl)) | |
342 print("environment is %s" % (os.environ)) | |
343 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) | |
344 else: | |
345 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) | |
346 p.stdin.write(self.script) | |
347 p.stdin.close() | |
348 retval = p.wait() | |
349 if self.opts.output_dir: | |
350 sto.close() | |
351 ste.close() | |
352 err = open(self.elog,'r').readlines() | |
353 if retval <> 0 and err: # problem | |
354 print >> sys.stderr,err #same problem, need to capture docker stdin/stdout | |
355 if self.opts.make_HTML: | |
356 self.makeHtml() | |
357 return retval | |
358 | |
359 def runBash(self): | |
360 """ | |
361 cannot use - for bash so use self.sfile | |
362 """ | |
363 if self.opts.output_dir: | |
364 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) | |
365 sto = open(self.tlog,'w') | |
366 sto.write(s) | |
367 sto.flush() | |
368 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) | |
369 else: | |
370 p = subprocess.Popen(self.cl,shell=False) | |
371 retval = p.wait() | |
372 if self.opts.output_dir: | |
373 sto.close() | |
374 if self.opts.make_HTML: | |
375 self.makeHtml() | |
376 return retval | |
377 | |
378 | |
379 def main(): | |
380 u = """ | |
381 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: | |
382 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" | |
383 </command> | |
384 """ | |
385 op = argparse.ArgumentParser() | |
386 a = op.add_argument | |
387 a('--docker_image',default=None) | |
388 a('--script_path',default=None) | |
389 a('--tool_name',default=None) | |
390 a('--interpreter',default=None) | |
391 a('--output_dir',default='./') | |
392 a('--output_html',default=None) | |
393 a('--input_tab',default='None', nargs='*') | |
394 a('--output_tab',default='None') | |
395 a('--user_email',default='Unknown') | |
396 a('--bad_user',default=None) | |
397 a('--make_HTML',default=None) | |
398 a('--new_tool',default=None) | |
399 a('--dockerized',default=0) | |
400 a('--output_format', default='tabular') | |
401 a('--input_format', dest='input_formats', action='append', default=[]) | |
402 a('--additional_parameters', dest='additional_parameters', action='append', default=[]) | |
403 opts = op.parse_args() | |
404 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) | |
405 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' | |
406 if opts.output_dir: | |
407 try: | |
408 os.makedirs(opts.output_dir) | |
409 except: | |
410 pass | |
411 if opts.dockerized==0: | |
412 switch_to_docker(opts) | |
413 return | |
414 r = ScriptRunner(opts) | |
415 retcode = r.run() | |
416 os.unlink(r.sfile) | |
417 if retcode: | |
418 sys.exit(retcode) # indicate failure to job runner | |
419 | |
420 | |
421 if __name__ == "__main__": | |
422 main() |