comparison venv/lib/python2.7/site-packages/bioblend/galaxy/objects/wrappers.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d67268158946
1 # pylint: disable=W0622,E1101
2
3 """
4 A basic object-oriented interface for Galaxy entities.
5 """
6
7 import abc
8 import collections
9 import json
10
11 from six.moves import http_client
12 import six
13
14 import bioblend
15
16 __all__ = [
17 'Wrapper',
18 'Step',
19 'Workflow',
20 'ContentInfo',
21 'LibraryContentInfo',
22 'HistoryContentInfo',
23 'DatasetContainer',
24 'History',
25 'Library',
26 'Folder',
27 'Dataset',
28 'HistoryDatasetAssociation',
29 'LibraryDatasetDatasetAssociation',
30 'LibraryDataset',
31 'Tool',
32 'Job',
33 'Preview',
34 'LibraryPreview',
35 'HistoryPreview',
36 'WorkflowPreview',
37 ]
38
39
40 @six.add_metaclass(abc.ABCMeta)
41 class Wrapper(object):
42 """
43 Abstract base class for Galaxy entity wrappers.
44
45 Wrapper instances wrap deserialized JSON dictionaries such as the
46 ones obtained by the Galaxy web API, converting key-based access to
47 attribute-based access (e.g., ``library['name'] -> library.name``).
48
49 Dict keys that are converted to attributes are listed in the
50 ``BASE_ATTRS`` class variable: this is the 'stable' interface.
51 Note that the wrapped dictionary is accessible via the ``wrapped``
52 attribute.
53 """
54 BASE_ATTRS = ('id', 'name')
55
56 @abc.abstractmethod
57 def __init__(self, wrapped, parent=None, gi=None):
58 """
59 :type wrapped: dict
60 :param wrapped: JSON-serializable dictionary
61
62 :type parent: :class:`Wrapper`
63 :param parent: the parent of this wrapper
64
65 :type gi: :class:`GalaxyInstance`
66 :param gi: the GalaxyInstance through which we can access this wrapper
67 """
68 if not isinstance(wrapped, collections.Mapping):
69 raise TypeError('wrapped object must be a mapping type')
70 # loads(dumps(x)) is a bit faster than deepcopy and allows type checks
71 try:
72 dumped = json.dumps(wrapped)
73 except (TypeError, ValueError):
74 raise ValueError('wrapped object must be JSON-serializable')
75 object.__setattr__(self, 'wrapped', json.loads(dumped))
76 for k in self.BASE_ATTRS:
77 object.__setattr__(self, k, self.wrapped.get(k))
78 object.__setattr__(self, '_cached_parent', parent)
79 object.__setattr__(self, 'is_modified', False)
80 object.__setattr__(self, 'gi', gi)
81
82 @abc.abstractproperty
83 def gi_module(self):
84 """
85 The GalaxyInstance module that deals with objects of this type.
86 """
87 pass
88
89 @property
90 def parent(self):
91 """
92 The parent of this wrapper.
93 """
94 return self._cached_parent
95
96 @property
97 def is_mapped(self):
98 """
99 ``True`` if this wrapper is mapped to an actual Galaxy entity.
100 """
101 return self.id is not None
102
103 def unmap(self):
104 """
105 Disconnect this wrapper from Galaxy.
106 """
107 object.__setattr__(self, 'id', None)
108
109 def clone(self):
110 """
111 Return an independent copy of this wrapper.
112 """
113 return self.__class__(self.wrapped)
114
115 def touch(self):
116 """
117 Mark this wrapper as having been modified since its creation.
118 """
119 object.__setattr__(self, 'is_modified', True)
120 if self.parent:
121 self.parent.touch()
122
123 def to_json(self):
124 """
125 Return a JSON dump of this wrapper.
126 """
127 return json.dumps(self.wrapped)
128
129 @classmethod
130 def from_json(cls, jdef):
131 """
132 Build a new wrapper from a JSON dump.
133 """
134 return cls(json.loads(jdef))
135
136 # FIXME: things like self.x[0] = 'y' do NOT call self.__setattr__
137 def __setattr__(self, name, value):
138 if name not in self.wrapped:
139 raise AttributeError("can't set attribute")
140 else:
141 self.wrapped[name] = value
142 object.__setattr__(self, name, value)
143 self.touch()
144
145 def __repr__(self):
146 return "%s(%r)" % (self.__class__.__name__, self.wrapped)
147
148
149 class Step(Wrapper):
150 """
151 Abstract base class for workflow steps.
152
153 Steps are the main building blocks of a Galaxy workflow. A step
154 can refer to either an input dataset (type 'data_input`) or a
155 computational tool (type 'tool`).
156 """
157 BASE_ATTRS = Wrapper.BASE_ATTRS + (
158 'input_steps', 'tool_id', 'tool_inputs', 'tool_version', 'type'
159 )
160
161 def __init__(self, step_dict, parent):
162 super(Step, self).__init__(step_dict, parent=parent, gi=parent.gi)
163 if self.type == 'tool' and self.tool_inputs:
164 for k, v in six.iteritems(self.tool_inputs):
165 self.tool_inputs[k] = json.loads(v)
166
167 @property
168 def gi_module(self):
169 return self.gi.workflows
170
171
172 class Workflow(Wrapper):
173 """
174 Workflows represent ordered sequences of computations on Galaxy.
175
176 A workflow defines a sequence of steps that produce one or more
177 results from an input dataset.
178 """
179 BASE_ATTRS = Wrapper.BASE_ATTRS + (
180 'deleted', 'inputs', 'published', 'steps', 'tags'
181 )
182 POLLING_INTERVAL = 10 # for output state monitoring
183
184 def __init__(self, wf_dict, gi=None):
185 super(Workflow, self).__init__(wf_dict, gi=gi)
186 missing_ids = []
187 if gi:
188 tools_list_by_id = [t.id for t in gi.tools.get_previews()]
189 else:
190 tools_list_by_id = []
191 for k, v in six.iteritems(self.steps):
192 # convert step ids to str for consistency with outer keys
193 v['id'] = str(v['id'])
194 for i in six.itervalues(v['input_steps']):
195 i['source_step'] = str(i['source_step'])
196 step = self._build_step(v, self)
197 self.steps[k] = step
198 if step.type == 'tool':
199 if not step.tool_inputs or step.tool_id not in tools_list_by_id:
200 missing_ids.append(k)
201 input_labels_to_ids = {}
202 for id_, d in six.iteritems(self.inputs):
203 input_labels_to_ids.setdefault(d['label'], set()).add(id_)
204 tool_labels_to_ids = {}
205 for s in six.itervalues(self.steps):
206 if s.type == 'tool':
207 tool_labels_to_ids.setdefault(s.tool_id, set()).add(s.id)
208 object.__setattr__(self, 'input_labels_to_ids', input_labels_to_ids)
209 object.__setattr__(self, 'tool_labels_to_ids', tool_labels_to_ids)
210 dag, inv_dag = self._get_dag()
211 heads, tails = set(dag), set(inv_dag)
212 object.__setattr__(self, 'dag', dag)
213 object.__setattr__(self, 'inv_dag', inv_dag)
214 object.__setattr__(self, 'source_ids', heads - tails)
215 assert self.data_input_ids == set(self.inputs)
216 object.__setattr__(self, 'sink_ids', tails - heads)
217 object.__setattr__(self, 'missing_ids', missing_ids)
218
219 @property
220 def gi_module(self):
221 return self.gi.workflows
222
223 def _get_dag(self):
224 """
225 Return the workflow's DAG.
226
227 For convenience, this method computes a 'direct' (step =>
228 successors) and an 'inverse' (step => predecessors)
229 representation of the same DAG.
230
231 For instance, a workflow with a single tool *c*, two inputs
232 *a, b* and three outputs *d, e, f* is represented by (direct)::
233
234 {'a': {'c'}, 'b': {'c'}, 'c': set(['d', 'e', 'f'])}
235
236 and by (inverse)::
237
238 {'c': set(['a', 'b']), 'd': {'c'}, 'e': {'c'}, 'f': {'c'}}
239 """
240 dag, inv_dag = {}, {}
241 for s in six.itervalues(self.steps):
242 for i in six.itervalues(s.input_steps):
243 head, tail = i['source_step'], s.id
244 dag.setdefault(head, set()).add(tail)
245 inv_dag.setdefault(tail, set()).add(head)
246 return dag, inv_dag
247
248 def sorted_step_ids(self):
249 """
250 Return a topological sort of the workflow's DAG.
251 """
252 ids = []
253 source_ids = self.source_ids.copy()
254 inv_dag = dict((k, v.copy()) for k, v in six.iteritems(self.inv_dag))
255 while source_ids:
256 head = source_ids.pop()
257 ids.append(head)
258 for tail in self.dag.get(head, []):
259 incoming = inv_dag[tail]
260 incoming.remove(head)
261 if not incoming:
262 source_ids.add(tail)
263 return ids
264
265 @staticmethod
266 def _build_step(step_dict, parent):
267 """
268 Return a Step object for the given parameters.
269 """
270 try:
271 stype = step_dict['type']
272 except KeyError:
273 raise ValueError('not a step dict')
274 if stype not in set(['data_input', 'tool']):
275 raise ValueError('unknown step type: %r' % (stype,))
276 return Step(step_dict, parent)
277
278 @property
279 def data_input_ids(self):
280 """
281 Return the list of data input steps for this workflow.
282 """
283 return set(id_ for id_, s in six.iteritems(self.steps)
284 if s.type == 'data_input')
285
286 @property
287 def tool_ids(self):
288 """
289 Return the list of tool steps for this workflow.
290 """
291 return set(id_ for id_, s in six.iteritems(self.steps)
292 if s.type == 'tool')
293
294 @property
295 def input_labels(self):
296 """
297 Return the labels of this workflow's input steps.
298 """
299 return set(self.input_labels_to_ids)
300
301 @property
302 def is_runnable(self):
303 """
304 Return True if the workflow can be run on Galaxy.
305
306 A workflow is considered runnable on a Galaxy instance if all
307 of the tools it uses are installed in that instance.
308 """
309 return not self.missing_ids
310
311 def convert_input_map(self, input_map):
312 """
313 Convert ``input_map`` to the format required by the Galaxy web API.
314
315 :type input_map: dict
316 :param input_map: a mapping from input labels to datasets
317
318 :rtype: dict
319 :return: a mapping from input slot ids to dataset ids in the
320 format required by the Galaxy web API.
321 """
322 m = {}
323 for label, slot_ids in six.iteritems(self.input_labels_to_ids):
324 datasets = input_map.get(label, [])
325 if not isinstance(datasets, collections.Iterable):
326 datasets = [datasets]
327 if len(datasets) < len(slot_ids):
328 raise RuntimeError('not enough datasets for "%s"' % label)
329 for id_, ds in zip(slot_ids, datasets):
330 m[id_] = {'id': ds.id, 'src': ds.SRC}
331 return m
332
333 def preview(self):
334 getf = self.gi.workflows.get_previews
335 try:
336 p = [_ for _ in getf(published=True) if _.id == self.id][0]
337 except IndexError:
338 raise ValueError('no object for id %s' % self.id)
339 return p
340
341 def run(self, input_map=None, history='', params=None, import_inputs=False,
342 replacement_params=None, wait=False,
343 polling_interval=POLLING_INTERVAL, break_on_error=True):
344 """
345 Run the workflow in the current Galaxy instance.
346
347 :type input_map: dict
348 :param input_map: a mapping from workflow input labels to
349 datasets, e.g.: ``dict(zip(workflow.input_labels,
350 library.get_datasets()))``
351
352 :type history: :class:`History` or str
353 :param history: either a valid history object (results will be
354 stored there) or a string (a new history will be created with
355 the given name).
356
357 :type params: :class:`~collections.Mapping`
358 :param params: parameter settings for workflow steps (see below)
359
360 :type import_inputs: bool
361 :param import_inputs: If ``True``, workflow inputs will be imported into
362 the history; if ``False``, only workflow outputs will be visible in
363 the history.
364
365 :type replacement_params: :class:`~collections.Mapping`
366 :param replacement_params: pattern-based replacements for
367 post-job actions (see the docs for
368 :meth:`~bioblend.galaxy.workflows.WorkflowClient.run_workflow`)
369
370 :type wait: bool
371 :param wait: whether to wait while the returned datasets are
372 in a pending state
373
374 :type polling_interval: float
375 :param polling_interval: polling interval in seconds
376
377 :type break_on_error: bool
378 :param break_on_error: whether to break as soon as at least one
379 of the returned datasets is in the 'error' state
380
381 :rtype: tuple
382 :return: list of output datasets, output history
383
384 The ``params`` dict should be structured as follows::
385
386 PARAMS = {STEP_ID: PARAM_DICT, ...}
387 PARAM_DICT = {NAME: VALUE, ...}
388
389 For backwards compatibility, the following (deprecated) format is
390 also supported::
391
392 PARAMS = {TOOL_ID: PARAM_DICT, ...}
393
394 in which case PARAM_DICT affects all steps with the given tool id.
395 If both by-tool-id and by-step-id specifications are used, the
396 latter takes precedence.
397
398 Finally (again, for backwards compatibility), PARAM_DICT can also
399 be specified as::
400
401 PARAM_DICT = {'param': NAME, 'value': VALUE}
402
403 Note that this format allows only one parameter to be set per step.
404
405 Example: set 'a' to 1 for the third workflow step::
406
407 params = {workflow.steps[2].id: {'a': 1}}
408
409 .. warning::
410
411 This is a blocking operation that can take a very long time. If
412 ``wait`` is set to ``False``, the method will return as soon as the
413 workflow has been *scheduled*, otherwise it will wait until the
414 workflow has been *run*. With a large number of steps, however, the
415 delay may not be negligible even in the former case (e.g. minutes for
416 100 steps).
417 """
418 if not self.is_mapped:
419 raise RuntimeError('workflow is not mapped to a Galaxy object')
420 if not self.is_runnable:
421 raise RuntimeError('workflow has missing tools: %s' % ', '.join(
422 '%s[%s]' % (self.steps[_].tool_id, _)
423 for _ in self.missing_ids))
424 kwargs = {
425 'dataset_map': self.convert_input_map(input_map or {}),
426 'params': params,
427 'import_inputs_to_history': import_inputs,
428 'replacement_params': replacement_params,
429 }
430 if isinstance(history, History):
431 try:
432 kwargs['history_id'] = history.id
433 except AttributeError:
434 raise RuntimeError('history does not have an id')
435 elif isinstance(history, six.string_types):
436 kwargs['history_name'] = history
437 else:
438 raise TypeError(
439 'history must be either a history wrapper or a string')
440 res = self.gi.gi.workflows.run_workflow(self.id, **kwargs)
441 # res structure: {'history': HIST_ID, 'outputs': [DS_ID, DS_ID, ...]}
442 out_hist = self.gi.histories.get(res['history'])
443 assert set(res['outputs']).issubset(out_hist.dataset_ids)
444 outputs = [out_hist.get_dataset(_) for _ in res['outputs']]
445
446 if wait:
447 self.gi._wait_datasets(outputs, polling_interval=polling_interval,
448 break_on_error=break_on_error)
449 return outputs, out_hist
450
451 def export(self):
452 """
453 Export a re-importable representation of the workflow.
454
455 :rtype: dict
456 :return: a JSON-serializable dump of the workflow
457 """
458 return self.gi.gi.workflows.export_workflow_json(self.id)
459
460 def delete(self):
461 """
462 Delete this workflow.
463
464 .. warning::
465 Deleting a workflow is irreversible - all of the data from
466 the workflow will be permanently deleted.
467 """
468 self.gi.workflows.delete(id_=self.id)
469 self.unmap()
470
471
472 @six.add_metaclass(abc.ABCMeta)
473 class Dataset(Wrapper):
474 """
475 Abstract base class for Galaxy datasets.
476 """
477 BASE_ATTRS = Wrapper.BASE_ATTRS + (
478 'data_type', 'file_name', 'file_size', 'state', 'deleted', 'file_ext'
479 )
480 POLLING_INTERVAL = 1 # for state monitoring
481
482 @abc.abstractmethod
483 def __init__(self, ds_dict, container, gi=None):
484 super(Dataset, self).__init__(ds_dict, gi=gi)
485 object.__setattr__(self, 'container', container)
486
487 @property
488 def container_id(self):
489 """
490 Deprecated property.
491
492 Id of the dataset container. Use :attr:`.container.id` instead.
493 """
494 return self.container.id
495
496 @abc.abstractproperty
497 def _stream_url(self):
498 """
499 Return the URL to stream this dataset.
500 """
501 pass
502
503 def get_stream(self, chunk_size=bioblend.CHUNK_SIZE):
504 """
505 Open dataset for reading and return an iterator over its contents.
506
507 :type chunk_size: int
508 :param chunk_size: read this amount of bytes at a time
509
510 .. warning::
511
512 Due to a change in the Galaxy API endpoint, this method does
513 not work on :class:`LibraryDataset` instances with Galaxy
514 ``release_2014.06.02``. Methods that delegate work to this one
515 are also affected: :meth:`peek`, :meth:`download` and
516 :meth:`get_contents`.
517 """
518 kwargs = {'stream': True}
519 if isinstance(self, LibraryDataset):
520 kwargs['params'] = {'ld_ids%5B%5D': self.id}
521 r = self.gi.gi.make_get_request(self._stream_url, **kwargs)
522 if isinstance(self, LibraryDataset) and r.status_code == 500:
523 # compatibility with older Galaxy releases
524 kwargs['params'] = {'ldda_ids%5B%5D': self.id}
525 r = self.gi.gi.make_get_request(self._stream_url, **kwargs)
526 r.raise_for_status()
527 return r.iter_content(chunk_size) # FIXME: client can't close r
528
529 def peek(self, chunk_size=bioblend.CHUNK_SIZE):
530 """
531 Open dataset for reading and return the first chunk.
532
533 See :meth:`.get_stream` for param info.
534 """
535 try:
536 return next(self.get_stream(chunk_size=chunk_size))
537 except StopIteration:
538 return b''
539
540 def download(self, file_object, chunk_size=bioblend.CHUNK_SIZE):
541 """
542 Open dataset for reading and save its contents to ``file_object``.
543
544 :type file_object: file
545 :param file_object: output file object
546
547 See :meth:`.get_stream` for info on other params.
548 """
549 for chunk in self.get_stream(chunk_size=chunk_size):
550 file_object.write(chunk)
551
552 def get_contents(self, chunk_size=bioblend.CHUNK_SIZE):
553 """
554 Open dataset for reading and return its **full** contents.
555
556 See :meth:`.get_stream` for param info.
557 """
558 return b''.join(self.get_stream(chunk_size=chunk_size))
559
560 def refresh(self):
561 """
562 Re-fetch the attributes pertaining to this object.
563
564 Returns: self
565 """
566 gi_client = getattr(self.gi.gi, self.container.API_MODULE)
567 ds_dict = gi_client.show_dataset(self.container.id, self.id)
568 self.__init__(ds_dict, self.container, self.gi)
569 return self
570
571 def wait(self, polling_interval=POLLING_INTERVAL, break_on_error=True):
572 """
573 Wait for this dataset to come out of the pending states.
574
575 :type polling_interval: float
576 :param polling_interval: polling interval in seconds
577
578 :type break_on_error: bool
579 :param break_on_error: if ``True``, raise a RuntimeError exception if
580 the dataset ends in the 'error' state.
581
582 .. warning::
583
584 This is a blocking operation that can take a very long time. Also,
585 note that this method does not return anything; however, this dataset
586 is refreshed (possibly multiple times) during the execution.
587 """
588 self.gi._wait_datasets([self], polling_interval=polling_interval,
589 break_on_error=break_on_error)
590
591
592 class HistoryDatasetAssociation(Dataset):
593 """
594 Maps to a Galaxy ``HistoryDatasetAssociation``.
595 """
596 BASE_ATTRS = Dataset.BASE_ATTRS + ('tags', 'visible')
597 SRC = 'hda'
598
599 def __init__(self, ds_dict, container, gi=None):
600 super(HistoryDatasetAssociation, self).__init__(
601 ds_dict, container, gi=gi)
602
603 @property
604 def gi_module(self):
605 return self.gi.histories
606
607 @property
608 def _stream_url(self):
609 base_url = self.gi.gi._make_url(
610 self.gi.gi.histories, module_id=self.container.id, contents=True)
611 return "%s/%s/display" % (base_url, self.id)
612
613 def delete(self):
614 """
615 Delete this dataset.
616 """
617 self.gi.gi.histories.delete_dataset(self.container.id, self.id)
618 self.container.refresh()
619 self.refresh()
620
621
622 class LibRelatedDataset(Dataset):
623 """
624 Base class for LibraryDatasetDatasetAssociation and LibraryDataset classes.
625 """
626
627 def __init__(self, ds_dict, container, gi=None):
628 super(LibRelatedDataset, self).__init__(ds_dict, container, gi=gi)
629
630 @property
631 def gi_module(self):
632 return self.gi.libraries
633
634 @property
635 def _stream_url(self):
636 base_url = self.gi.gi._make_url(self.gi.gi.libraries)
637 return "%s/datasets/download/uncompressed" % base_url
638
639
640 class LibraryDatasetDatasetAssociation(LibRelatedDataset):
641 """
642 Maps to a Galaxy ``LibraryDatasetDatasetAssociation``.
643 """
644 SRC = 'ldda'
645
646
647 class LibraryDataset(LibRelatedDataset):
648 """
649 Maps to a Galaxy ``LibraryDataset``.
650 """
651 SRC = 'ld'
652
653 def delete(self, purged=False):
654 """
655 Delete this library dataset.
656
657 :type purged: bool
658 :param purged: if ``True``, also purge (permanently delete) the dataset
659 """
660 self.gi.gi.libraries.delete_library_dataset(
661 self.container.id, self.id, purged=purged)
662 self.container.refresh()
663 self.refresh()
664
665
666 @six.add_metaclass(abc.ABCMeta)
667 class ContentInfo(Wrapper):
668 """
669 Instances of this class wrap dictionaries obtained by getting
670 ``/api/{histories,libraries}/<ID>/contents`` from Galaxy.
671 """
672 BASE_ATTRS = Wrapper.BASE_ATTRS + ('type',)
673
674 @abc.abstractmethod
675 def __init__(self, info_dict, gi=None):
676 super(ContentInfo, self).__init__(info_dict, gi=gi)
677
678
679 class LibraryContentInfo(ContentInfo):
680 """
681 Instances of this class wrap dictionaries obtained by getting
682 ``/api/libraries/<ID>/contents`` from Galaxy.
683 """
684 def __init__(self, info_dict, gi=None):
685 super(LibraryContentInfo, self).__init__(info_dict, gi=gi)
686
687 @property
688 def gi_module(self):
689 return self.gi.libraries
690
691
692 class HistoryContentInfo(ContentInfo):
693 """
694 Instances of this class wrap dictionaries obtained by getting
695 ``/api/histories/<ID>/contents`` from Galaxy.
696 """
697 BASE_ATTRS = ContentInfo.BASE_ATTRS + ('deleted', 'state', 'visible')
698
699 def __init__(self, info_dict, gi=None):
700 super(HistoryContentInfo, self).__init__(info_dict, gi=gi)
701
702 @property
703 def gi_module(self):
704 return self.gi.histories
705
706
707 @six.add_metaclass(abc.ABCMeta)
708 class DatasetContainer(Wrapper):
709 """
710 Abstract base class for dataset containers (histories and libraries).
711 """
712 BASE_ATTRS = Wrapper.BASE_ATTRS + ('deleted',)
713
714 @abc.abstractmethod
715 def __init__(self, c_dict, content_infos=None, gi=None):
716 """
717 :type content_infos: list of :class:`ContentInfo`
718 :param content_infos: info objects for the container's contents
719 """
720 super(DatasetContainer, self).__init__(c_dict, gi=gi)
721 if content_infos is None:
722 content_infos = []
723 object.__setattr__(self, 'content_infos', content_infos)
724
725 @property
726 def dataset_ids(self):
727 """
728 Return the ids of the contained datasets.
729 """
730 return [_.id for _ in self.content_infos if _.type == 'file']
731
732 def preview(self):
733 getf = self.gi_module.get_previews
734 # self.state could be stale: check both regular and deleted containers
735 try:
736 p = [_ for _ in getf() if _.id == self.id][0]
737 except IndexError:
738 try:
739 p = [_ for _ in getf(deleted=True) if _.id == self.id][0]
740 except IndexError:
741 raise ValueError('no object for id %s' % self.id)
742 return p
743
744 def refresh(self):
745 """
746 Re-fetch the attributes pertaining to this object.
747
748 Returns: self
749 """
750 fresh = self.gi_module.get(self.id)
751 self.__init__(
752 fresh.wrapped, content_infos=fresh.content_infos, gi=self.gi)
753 return self
754
755 def get_dataset(self, ds_id):
756 """
757 Retrieve the dataset corresponding to the given id.
758
759 :type ds_id: str
760 :param ds_id: dataset id
761
762 :rtype: :class:`~.HistoryDatasetAssociation` or
763 :class:`~.LibraryDataset`
764 :return: the dataset corresponding to ``ds_id``
765 """
766 gi_client = getattr(self.gi.gi, self.API_MODULE)
767 ds_dict = gi_client.show_dataset(self.id, ds_id)
768 return self.DS_TYPE(ds_dict, self, gi=self.gi)
769
770 def get_datasets(self, name=None):
771 """
772 Get all datasets contained inside this dataset container.
773
774 :type name: str
775 :param name: return only datasets with this name
776
777 :rtype: list of :class:`~.HistoryDatasetAssociation` or list of
778 :class:`~.LibraryDataset`
779 :return: datasets with the given name contained inside this
780 container
781
782 .. note::
783
784 when filtering library datasets by name, specify their full
785 paths starting from the library's root folder, e.g.,
786 ``/seqdata/reads.fastq``. Full paths are available through
787 the ``content_infos`` attribute of
788 :class:`~.Library` objects.
789 """
790 if name is None:
791 ds_ids = self.dataset_ids
792 else:
793 ds_ids = [_.id for _ in self.content_infos if _.name == name]
794 return [self.get_dataset(_) for _ in ds_ids]
795
796
797 class History(DatasetContainer):
798 """
799 Maps to a Galaxy history.
800 """
801 BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('annotation', 'state', 'state_ids', 'state_details', 'tags')
802 DS_TYPE = HistoryDatasetAssociation
803 CONTENT_INFO_TYPE = HistoryContentInfo
804 API_MODULE = 'histories'
805
806 def __init__(self, hist_dict, content_infos=None, gi=None):
807 super(History, self).__init__(
808 hist_dict, content_infos=content_infos, gi=gi)
809
810 @property
811 def gi_module(self):
812 return self.gi.histories
813
814 def update(self, name=None, annotation=None, **kwds):
815 """
816 Update history metadata information. Some of the attributes that can be
817 modified are documented below.
818
819 :type name: str
820 :param name: Replace history name with the given string
821
822 :type annotation: str
823 :param annotation: Replace history annotation with the given string
824
825 :type deleted: bool
826 :param deleted: Mark or unmark history as deleted
827
828 :type published: bool
829 :param published: Mark or unmark history as published
830
831 :type importable: bool
832 :param importable: Mark or unmark history as importable
833
834 :type tags: list
835 :param tags: Replace history tags with the given list
836 """
837 # TODO: wouldn't it be better if name and annotation were attributes?
838 # TODO: do we need to ensure the attributes of `self` are the same as
839 # the ones returned by the call to `update_history` below?
840 res = self.gi.gi.histories.update_history(
841 self.id, name=name, annotation=annotation, **kwds)
842 if res != http_client.OK:
843 raise RuntimeError('failed to update history')
844 self.refresh()
845 return self
846
847 def delete(self, purge=False):
848 """
849 Delete this history.
850
851 :type purge: bool
852 :param purge: if ``True``, also purge (permanently delete) the history
853
854 .. note::
855 For the purge option to work, the Galaxy instance must have the
856 ``allow_user_dataset_purge`` option set to ``True`` in the
857 ``config/galaxy.ini`` configuration file.
858 """
859 self.gi.histories.delete(id_=self.id, purge=purge)
860 self.unmap()
861
862 def import_dataset(self, lds):
863 """
864 Import a dataset into the history from a library.
865
866 :type lds: :class:`~.LibraryDataset`
867 :param lds: the library dataset to import
868
869 :rtype: :class:`~.HistoryDatasetAssociation`
870 :return: the imported history dataset
871 """
872 if not self.is_mapped:
873 raise RuntimeError('history is not mapped to a Galaxy object')
874 if not isinstance(lds, LibraryDataset):
875 raise TypeError('lds is not a LibraryDataset')
876 res = self.gi.gi.histories.upload_dataset_from_library(self.id, lds.id)
877 if not isinstance(res, collections.Mapping):
878 raise RuntimeError(
879 'upload_dataset_from_library: unexpected reply: %r' % res)
880 self.refresh()
881 return self.get_dataset(res['id'])
882
883 def upload_file(self, path, **kwargs):
884 """
885 Upload the file specified by path to this history.
886
887 :type path: str
888 :param path: path of the file to upload
889
890 See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for
891 the optional parameters.
892
893 :rtype: :class:`~.HistoryDatasetAssociation`
894 :return: the uploaded dataset
895 """
896 out_dict = self.gi.gi.tools.upload_file(path, self.id, **kwargs)
897 self.refresh()
898 return self.get_dataset(out_dict['outputs'][0]['id'])
899
900 upload_dataset = upload_file
901
902 def paste_content(self, content, **kwargs):
903 """
904 Upload a string to a new dataset in this history.
905
906 :type content: str
907 :param content: content of the new dataset to upload
908
909 See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for
910 the optional parameters (except file_name).
911
912 :rtype: :class:`~.HistoryDatasetAssociation`
913 :return: the uploaded dataset
914 """
915 out_dict = self.gi.gi.tools.paste_content(content, self.id, **kwargs)
916 self.refresh()
917 return self.get_dataset(out_dict['outputs'][0]['id'])
918
919 def export(self, gzip=True, include_hidden=False, include_deleted=False,
920 wait=False):
921 """
922 Start a job to create an export archive for this history. See
923 :meth:`~bioblend.galaxy.histories.HistoryClient.export_history`
924 for parameter and return value info.
925 """
926 return self.gi.gi.histories.export_history(
927 self.id,
928 gzip=gzip,
929 include_hidden=include_hidden,
930 include_deleted=include_deleted,
931 wait=wait)
932
933 def download(self, jeha_id, outf, chunk_size=bioblend.CHUNK_SIZE):
934 """
935 Download an export archive for this history. Use :meth:`export`
936 to create an export and get the required ``jeha_id``. See
937 :meth:`~bioblend.galaxy.histories.HistoryClient.download_history`
938 for parameter and return value info.
939 """
940 return self.gi.gi.histories.download_history(
941 self.id, jeha_id, outf, chunk_size=chunk_size)
942
943
944 class Library(DatasetContainer):
945 """
946 Maps to a Galaxy library.
947 """
948 BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('description', 'synopsis')
949 DS_TYPE = LibraryDataset
950 CONTENT_INFO_TYPE = LibraryContentInfo
951 API_MODULE = 'libraries'
952
953 def __init__(self, lib_dict, content_infos=None, gi=None):
954 super(Library, self).__init__(
955 lib_dict, content_infos=content_infos, gi=gi)
956
957 @property
958 def gi_module(self):
959 return self.gi.libraries
960
961 @property
962 def folder_ids(self):
963 """
964 Return the ids of the contained folders.
965 """
966 return [_.id for _ in self.content_infos if _.type == 'folder']
967
968 def delete(self):
969 """
970 Delete this library.
971 """
972 self.gi.libraries.delete(id_=self.id)
973 self.unmap()
974
975 def __pre_upload(self, folder):
976 """
977 Return the id of the given folder, after sanity checking.
978 """
979 if not self.is_mapped:
980 raise RuntimeError('library is not mapped to a Galaxy object')
981 return None if folder is None else folder.id
982
983 def upload_data(self, data, folder=None, **kwargs):
984 """
985 Upload data to this library.
986
987 :type data: str
988 :param data: dataset contents
989
990 :type folder: :class:`~.Folder`
991 :param folder: a folder object, or ``None`` to upload to the root folder
992
993 :rtype: :class:`~.LibraryDataset`
994 :return: the dataset object that represents the uploaded content
995
996 Optional keyword arguments: ``file_type``, ``dbkey``.
997 """
998 fid = self.__pre_upload(folder)
999 res = self.gi.gi.libraries.upload_file_contents(
1000 self.id, data, folder_id=fid, **kwargs)
1001 self.refresh()
1002 return self.get_dataset(res[0]['id'])
1003
1004 def upload_from_url(self, url, folder=None, **kwargs):
1005 """
1006 Upload data to this library from the given URL.
1007
1008 :type url: str
1009 :param url: URL from which data should be read
1010
1011 See :meth:`.upload_data` for info on other params.
1012 """
1013 fid = self.__pre_upload(folder)
1014 res = self.gi.gi.libraries.upload_file_from_url(
1015 self.id, url, folder_id=fid, **kwargs)
1016 self.refresh()
1017 return self.get_dataset(res[0]['id'])
1018
1019 def upload_from_local(self, path, folder=None, **kwargs):
1020 """
1021 Upload data to this library from a local file.
1022
1023 :type path: str
1024 :param path: local file path from which data should be read
1025
1026 See :meth:`.upload_data` for info on other params.
1027 """
1028 fid = self.__pre_upload(folder)
1029 res = self.gi.gi.libraries.upload_file_from_local_path(
1030 self.id, path, folder_id=fid, **kwargs)
1031 self.refresh()
1032 return self.get_dataset(res[0]['id'])
1033
1034 def upload_from_galaxy_fs(self, paths, folder=None, link_data_only=None, **kwargs):
1035 """
1036 Upload data to this library from filesystem paths on the server.
1037
1038 .. note::
1039 For this method to work, the Galaxy instance must have the
1040 ``allow_library_path_paste`` option set to ``True`` in the
1041 ``config/galaxy.ini`` configuration file.
1042
1043 :type paths: str or :class:`~collections.Iterable` of str
1044 :param paths: server-side file paths from which data should be read
1045
1046 :type link_data_only: str
1047 :param link_data_only: either 'copy_files' (default) or
1048 'link_to_files'. Setting to 'link_to_files' symlinks instead of
1049 copying the files
1050
1051 :rtype: list of :class:`~.LibraryDataset`
1052 :return: the dataset objects that represent the uploaded content
1053
1054 See :meth:`.upload_data` for info on other params.
1055 """
1056 fid = self.__pre_upload(folder)
1057 if isinstance(paths, six.string_types):
1058 paths = (paths,)
1059 paths = '\n'.join(paths)
1060 res = self.gi.gi.libraries.upload_from_galaxy_filesystem(
1061 self.id, paths, folder_id=fid, link_data_only=link_data_only,
1062 **kwargs)
1063 if res is None:
1064 raise RuntimeError('upload_from_galaxy_filesystem: no reply')
1065 if not isinstance(res, collections.Sequence):
1066 raise RuntimeError(
1067 'upload_from_galaxy_filesystem: unexpected reply: %r' % res)
1068 new_datasets = [
1069 self.get_dataset(ds_info['id']) for ds_info in res
1070 ]
1071 self.refresh()
1072 return new_datasets
1073
1074 def copy_from_dataset(self, hda, folder=None, message=''):
1075 """
1076 Copy a history dataset into this library.
1077
1078 :type hda: :class:`~.HistoryDatasetAssociation`
1079 :param hda: history dataset to copy into the library
1080
1081 See :meth:`.upload_data` for info on other params.
1082 """
1083 fid = self.__pre_upload(folder)
1084 res = self.gi.gi.libraries.copy_from_dataset(
1085 self.id, hda.id, folder_id=fid, message=message)
1086 self.refresh()
1087 return self.get_dataset(res['library_dataset_id'])
1088
1089 def create_folder(self, name, description=None, base_folder=None):
1090 """
1091 Create a folder in this library.
1092
1093 :type name: str
1094 :param name: folder name
1095
1096 :type description: str
1097 :param description: optional folder description
1098
1099 :type base_folder: :class:`~.Folder`
1100 :param base_folder: parent folder, or ``None`` to create in the root
1101 folder
1102
1103 :rtype: :class:`~.Folder`
1104 :return: the folder just created
1105 """
1106 bfid = None if base_folder is None else base_folder.id
1107 res = self.gi.gi.libraries.create_folder(
1108 self.id, name, description=description, base_folder_id=bfid)
1109 self.refresh()
1110 return self.get_folder(res[0]['id'])
1111
1112 def get_folder(self, f_id):
1113 """
1114 Retrieve the folder corresponding to the given id.
1115
1116 :rtype: :class:`~.Folder`
1117 :return: the folder corresponding to ``f_id``
1118 """
1119 f_dict = self.gi.gi.libraries.show_folder(self.id, f_id)
1120 return Folder(f_dict, self, gi=self.gi)
1121
1122 @property
1123 def root_folder(self):
1124 """
1125 The root folder of this library.
1126
1127 :rtype: :class:`~.Folder`
1128 :return: the root folder of this library
1129 """
1130 return self.get_folder(self.gi.gi.libraries._get_root_folder_id(self.id))
1131
1132
1133 class Folder(Wrapper):
1134 """
1135 Maps to a folder in a Galaxy library.
1136 """
1137 BASE_ATTRS = Wrapper.BASE_ATTRS + ('description', 'deleted', 'item_count')
1138
1139 def __init__(self, f_dict, container, gi=None):
1140 super(Folder, self).__init__(f_dict, gi=gi)
1141 object.__setattr__(self, 'container', container)
1142
1143 @property
1144 def parent(self):
1145 """
1146 The parent folder of this folder. The parent of the root folder is
1147 ``None``.
1148
1149 :rtype: :class:`~.Folder`
1150 :return: the parent of this folder
1151 """
1152 if self._cached_parent is None:
1153 object.__setattr__(self,
1154 '_cached_parent',
1155 self._get_parent())
1156 return self._cached_parent
1157
1158 def _get_parent(self):
1159 """
1160 Return the parent folder of this folder.
1161 """
1162 # Galaxy release_13.04 and earlier does not have parent_id in the folder
1163 # dictionary, may be implemented by searching for the folder with the
1164 # correct name
1165 if 'parent_id' not in self.wrapped:
1166 raise NotImplementedError('This method has not been implemented for Galaxy release_13.04 and earlier')
1167 parent_id = self.wrapped['parent_id']
1168 if parent_id is None:
1169 return None
1170 # Galaxy from release_14.02 to release_15.01 returns a dummy parent_id
1171 # for the root folder instead of None, so check if this is the root
1172 if self.id == self.gi.gi.libraries._get_root_folder_id(self.container.id):
1173 return None
1174 # Galaxy release_13.11 and earlier returns a parent_id without the
1175 # initial 'F'
1176 if not parent_id.startswith('F'):
1177 parent_id = 'F' + parent_id
1178 return self.container.get_folder(parent_id)
1179
1180 @property
1181 def gi_module(self):
1182 return self.gi.libraries
1183
1184 @property
1185 def container_id(self):
1186 """
1187 Deprecated property.
1188
1189 Id of the folder container. Use :attr:`.container.id` instead.
1190 """
1191 return self.container.id
1192
1193 def refresh(self):
1194 """
1195 Re-fetch the attributes pertaining to this object.
1196
1197 Returns: self
1198 """
1199 f_dict = self.gi.gi.libraries.show_folder(self.container.id, self.id)
1200 self.__init__(f_dict, self.container, gi=self.gi)
1201 return self
1202
1203
1204 class Tool(Wrapper):
1205 """
1206 Maps to a Galaxy tool.
1207 """
1208 BASE_ATTRS = Wrapper.BASE_ATTRS + ('version',)
1209 POLLING_INTERVAL = 10 # for output state monitoring
1210
1211 def __init__(self, t_dict, gi=None):
1212 super(Tool, self).__init__(t_dict, gi=gi)
1213
1214 @property
1215 def gi_module(self):
1216 return self.gi.tools
1217
1218 def run(self, inputs, history, wait=False,
1219 polling_interval=POLLING_INTERVAL):
1220 """
1221 Execute this tool in the given history with inputs from dict
1222 ``inputs``.
1223
1224 :type inputs: dict
1225 :param inputs: dictionary of input datasets and parameters for
1226 the tool (see below)
1227
1228 :type history: :class:`History`
1229 :param history: the history where to execute the tool
1230
1231 :type wait: bool
1232 :param wait: whether to wait while the returned datasets are
1233 in a pending state
1234
1235 :type polling_interval: float
1236 :param polling_interval: polling interval in seconds
1237
1238 :rtype: list of :class:`HistoryDatasetAssociation`
1239 :return: list of output datasets
1240
1241 The ``inputs`` dict should contain input datasets and parameters
1242 in the (largely undocumented) format used by the Galaxy API.
1243 Some examples can be found in `Galaxy's API test suite
1244 <https://bitbucket.org/galaxy/galaxy-central/src/tip/test/api/test_tools.py>`_.
1245 The value of an input dataset can also be a :class:`Dataset`
1246 object, which will be automatically converted to the needed
1247 format.
1248 """
1249 for k, v in six.iteritems(inputs):
1250 if isinstance(v, Dataset):
1251 inputs[k] = {'src': v.SRC, 'id': v.id}
1252 out_dict = self.gi.gi.tools.run_tool(history.id, self.id, inputs)
1253 outputs = [history.get_dataset(_['id']) for _ in out_dict['outputs']]
1254 if wait:
1255 self.gi._wait_datasets(outputs, polling_interval=polling_interval)
1256 return outputs
1257
1258
1259 class Job(Wrapper):
1260 """
1261 Maps to a Galaxy job.
1262 """
1263 BASE_ATTRS = ('id', 'state')
1264
1265 def __init__(self, j_dict, gi=None):
1266 super(Job, self).__init__(j_dict, gi=gi)
1267
1268 @property
1269 def gi_module(self):
1270 return self.gi.jobs
1271
1272
1273 @six.add_metaclass(abc.ABCMeta)
1274 class Preview(Wrapper):
1275 """
1276 Abstract base class for Galaxy entity 'previews'.
1277
1278 Classes derived from this one model the short summaries returned
1279 by global getters such as ``/api/libraries``.
1280 """
1281 BASE_ATTRS = Wrapper.BASE_ATTRS + ('deleted',)
1282
1283 @abc.abstractmethod
1284 def __init__(self, pw_dict, gi=None):
1285 super(Preview, self).__init__(pw_dict, gi=gi)
1286
1287
1288 class LibraryPreview(Preview):
1289 """
1290 Models Galaxy library 'previews'.
1291
1292 Instances of this class wrap dictionaries obtained by getting
1293 ``/api/libraries`` from Galaxy.
1294 """
1295 def __init__(self, pw_dict, gi=None):
1296 super(LibraryPreview, self).__init__(pw_dict, gi=gi)
1297
1298 @property
1299 def gi_module(self):
1300 return self.gi.libraries
1301
1302
1303 class HistoryPreview(Preview):
1304 """
1305 Models Galaxy history 'previews'.
1306
1307 Instances of this class wrap dictionaries obtained by getting
1308 ``/api/histories`` from Galaxy.
1309 """
1310 BASE_ATTRS = Preview.BASE_ATTRS + ('tags',)
1311
1312 def __init__(self, pw_dict, gi=None):
1313 super(HistoryPreview, self).__init__(pw_dict, gi=gi)
1314
1315 @property
1316 def gi_module(self):
1317 return self.gi.histories
1318
1319
1320 class WorkflowPreview(Preview):
1321 """
1322 Models Galaxy workflow 'previews'.
1323
1324 Instances of this class wrap dictionaries obtained by getting
1325 ``/api/workflows`` from Galaxy.
1326 """
1327 BASE_ATTRS = Preview.BASE_ATTRS + ('published', 'tags')
1328
1329 def __init__(self, pw_dict, gi=None):
1330 super(WorkflowPreview, self).__init__(pw_dict, gi=gi)
1331
1332 @property
1333 def gi_module(self):
1334 return self.gi.workflows
1335
1336
1337 class JobPreview(Preview):
1338 """
1339 Models Galaxy job 'previews'.
1340
1341 Instances of this class wrap dictionaries obtained by getting
1342 ``/api/jobs`` from Galaxy.
1343 """
1344 BASE_ATTRS = ('id', 'state')
1345
1346 def __init__(self, pw_dict, gi=None):
1347 super(JobPreview, self).__init__(pw_dict, gi=gi)
1348
1349 @property
1350 def gi_module(self):
1351 return self.gi.jobs