comparison jbrowse2.py @ 0:61add3f58f26 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 4fa86613193c985e0cb9a8fc795c56b8bc7b8532
author iuc
date Thu, 02 Oct 2025 10:19:44 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:61add3f58f26
1 #!/usr/bin/env python
2 import argparse
3 import csv
4 import datetime
5 import hashlib
6 import json
7 import logging
8 import os
9 import re
10 import shutil
11 import subprocess
12 import xml.etree.ElementTree as ET
13 from collections import defaultdict
14
15 import requests
16
17
18 logging.basicConfig(level=logging.DEBUG)
19 log = logging.getLogger("jbrowse")
20 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
21 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
22 GALAXY_INFRASTRUCTURE_URL = None
23 mapped_chars = {
24 ">": "__gt__",
25 "<": "__lt__",
26 "'": "__sq__",
27 '"': "__dq__",
28 "[": "__ob__",
29 "]": "__cb__",
30 "{": "__oc__",
31 "}": "__cc__",
32 "@": "__at__",
33 "#": "__pd__",
34 "": "__cn__",
35 }
36
37
38 def etree_to_dict(t):
39 if t is None:
40 return {}
41
42 d = {t.tag: {} if t.attrib else None}
43 children = list(t)
44 if children:
45 dd = defaultdict(list)
46 for dc in map(etree_to_dict, children):
47 for k, v in dc.items():
48 dd[k].append(v)
49 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
50 if t.attrib:
51 d[t.tag].update(("@" + k, v) for k, v in t.attrib.items())
52 if t.text:
53 text = t.text.strip()
54 if children or t.attrib:
55 if text:
56 d[t.tag]["#text"] = text
57 else:
58 d[t.tag] = text
59 return d
60
61
62 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
63
64
65 def metadata_from_node(node):
66 metadata = {}
67
68 if len(node.findall("dataset")) == 1:
69
70 for key, value in node.findall("dataset")[0].attrib.items():
71 metadata[f"dataset_{key}"] = value
72
73 for key, value in node.findall("history")[0].attrib.items():
74 metadata[f"history_{key}"] = value
75
76 for key, value in node.findall("metadata")[0].attrib.items():
77 metadata[f"metadata_{key}"] = value
78
79 for key, value in node.findall("tool")[0].attrib.items():
80 metadata[f"tool_{key}"] = value
81
82 # Additional Mappings applied:
83 metadata[
84 "dataset_edam_format"
85 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
86 metadata["dataset_edam_format"], metadata["dataset_file_ext"]
87 )
88 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
89 metadata["history_user_email"]
90 )
91 metadata[
92 "history_display_name"
93 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
94 galaxy=GALAXY_INFRASTRUCTURE_URL,
95 encoded_hist_id=metadata["history_id"],
96 hist_name=metadata["history_display_name"],
97 )
98 metadata[
99 "tool_tool"
100 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
101 galaxy=GALAXY_INFRASTRUCTURE_URL,
102 encoded_id=metadata["dataset_id"],
103 tool_id=metadata["tool_tool_id"],
104 # tool_version=metadata['tool_tool_version'],
105 )
106
107 # Load additional metadata from a TSV file if any given by user
108 bonus = node.findall("bonus")
109 if bonus and "src" in bonus[0].attrib and bonus[0].attrib["src"]:
110 with open(bonus[0].attrib["src"], "r") as bonus_tsv:
111 bonus_content = csv.reader(bonus_tsv, delimiter="\t", quotechar='"')
112 for row in bonus_content:
113 if len(row) == 2:
114 if row[0] in metadata:
115 log.warning(f"Overwriting existing metadata {row[0]} with value from bonus file {row[1]}")
116 metadata[row[0]] = row[1]
117 else:
118 log.warning(f"Skipping invalid bonus metadata line: {row}")
119
120 return metadata
121
122
123 class JbrowseConnector(object):
124 def __init__(self, jbrowse, outdir, update):
125 self.jbrowse = jbrowse
126 self.outdir = outdir
127 self.update = update
128
129 self.tracksToIndex = {}
130
131 # This is the id of the current assembly
132 self.assembly_ids = {}
133
134 self.default_views = {}
135
136 self.plugins = []
137
138 self.use_synteny_viewer = False
139
140 self.synteny_tracks = []
141
142 self.clone_jbrowse(self.jbrowse, self.outdir)
143
144 # If upgrading, look at the existing data
145 self.check_existing(self.outdir)
146
147 def get_cwd(self, cwd):
148 if cwd:
149 return self.outdir
150 else:
151 return subprocess.check_output(['pwd']).decode('utf-8').strip()
152 # return None
153
154 def subprocess_check_call(self, command, output=None, cwd=True):
155 if output:
156 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)} > {output.name}")
157 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output)
158 else:
159 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}")
160 subprocess.check_call(command, cwd=self.get_cwd(cwd))
161
162 def subprocess_popen(self, command, cwd=True):
163 log.debug(f"cd {self.get_cwd(cwd)} && {command}")
164 p = subprocess.Popen(
165 command,
166 cwd=self.get_cwd(cwd),
167 shell=True,
168 stdin=subprocess.PIPE,
169 stdout=subprocess.PIPE,
170 stderr=subprocess.PIPE,
171 )
172 output, err = p.communicate()
173 retcode = p.returncode
174 if retcode != 0:
175 log.error(f"cd {self.get_cwd(cwd)} && {command}")
176 log.error(output)
177 log.error(err)
178 raise RuntimeError(f"Command failed with exit code {retcode}")
179
180 def subprocess_check_output(self, command, cwd=True):
181 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}")
182 return subprocess.check_output(command, cwd=self.get_cwd(cwd))
183
184 def symlink_or_copy(self, src, dest):
185 # Use to support symlinking in jbrowse1, in jbrowse2 prefer to use remote uri
186 cmd = ["cp", src, dest]
187
188 return self.subprocess_check_call(cmd)
189
190 def _prepare_track_style(self, xml_conf):
191 style_data = {
192 "type": "LinearBasicDisplay", # No ideal default, but should be overwritten anyway
193 }
194
195 if "display" in xml_conf["style"]:
196 style_data["type"] = xml_conf["style"]["display"]
197
198 style_data["displayId"] = f"{xml_conf['label']}_{style_data['type']}"
199
200 style_data.update(self._prepare_renderer_config(style_data["type"], xml_conf["style"]))
201
202 return {"displays": [style_data]}
203
204 def _prepare_renderer_config(self, display_type, xml_conf):
205
206 style_data = {}
207
208 # if display_type in ("LinearBasicDisplay", "LinearVariantDisplay"):
209 # TODO LinearVariantDisplay does not understand these options when written in config.json
210 if display_type in ("LinearBasicDisplay"):
211
212 # Doc: https://jbrowse.org/jb2/docs/config/svgfeaturerenderer/
213 style_data["renderer"] = {
214 "type": "SvgFeatureRenderer",
215 "showLabels": xml_conf.get("show_labels", True),
216 "showDescriptions": xml_conf.get("show_descriptions", True),
217 "labels": {
218 "name": xml_conf.get("labels_name", "jexl:get(feature,'name') || get(feature,'id')"),
219 "description": xml_conf.get("descriptions_name", "jexl:get(feature,'note') || get(feature,'description')")
220 },
221 "displayMode": xml_conf.get("display_mode", "normal"),
222 "maxHeight": xml_conf.get("max_height", 1200),
223 }
224
225 elif display_type == "LinearArcDisplay":
226
227 # Doc: https://jbrowse.org/jb2/docs/config/arcrenderer/
228 style_data["renderer"] = {
229 "type": "ArcRenderer",
230 "label": xml_conf.get("labels_name", "jexl:get(feature,'score')"),
231 "displayMode": xml_conf.get("display_mode", "arcs"),
232 }
233
234 elif display_type == "LinearWiggleDisplay":
235
236 wig_renderer = xml_conf.get("renderer", "xyplot")
237 style_data["defaultRendering"] = wig_renderer
238
239 elif display_type == "MultiLinearWiggleDisplay":
240
241 wig_renderer = xml_conf.get("renderer", "multirowxy")
242 style_data["defaultRendering"] = wig_renderer
243
244 elif display_type == "LinearSNPCoverageDisplay":
245
246 # Does not work
247 # style_data["renderer"] = {
248 # "type": "SNPCoverageRenderer",
249 # "displayCrossHatches": xml_conf.get("display_cross_hatches", True),
250 # }
251
252 style_data["scaleType"] = xml_conf.get("scale_type", "linear")
253 if "min_score" in xml_conf:
254 style_data["minScore"] = xml_conf["min_score"]
255
256 if "max_score" in xml_conf:
257 style_data["maxScore"] = xml_conf["max_score"]
258
259 # Doc: https://jbrowse.org/jb2/docs/config/snpcoveragerenderer/
260
261 return style_data
262
263 def _prepare_format_details(self, xml_conf):
264 formatDetails = {
265 }
266
267 if "feature" in xml_conf["formatdetails"]:
268 feat_jexl = xml_conf["formatdetails"]["feature"]
269 for key, value in mapped_chars.items():
270 feat_jexl = feat_jexl.replace(value, key)
271 formatDetails["feature"] = feat_jexl
272
273 if "subfeature" in xml_conf["formatdetails"]:
274 sfeat_jexl = xml_conf["formatdetails"]["subfeature"]
275 for key, value in mapped_chars.items():
276 sfeat_jexl = sfeat_jexl.replace(value, key)
277 formatDetails["subfeatures"] = sfeat_jexl
278
279 if "depth" in xml_conf["formatdetails"]:
280 formatDetails["depth"] = int(xml_conf["formatdetails"]["depth"])
281
282 return {"formatDetails": formatDetails}
283
284 def _prepare_track_metadata(self, xml_conf):
285 metadata = {
286 }
287
288 metadata = xml_conf["metadata"]
289
290 return {"metadata": metadata}
291
292 def check_existing(self, destination):
293 existing = os.path.join(destination, "config.json")
294 if os.path.exists(existing):
295 with open(existing, "r") as existing_conf:
296 conf = json.load(existing_conf)
297 if "assemblies" in conf:
298 for assembly in conf["assemblies"]:
299 if "name" in assembly:
300
301 # Look for a default scaffold
302 default_seq = None
303 if 'defaultSession' in conf and 'views' in conf['defaultSession']:
304 for view in conf['defaultSession']['views']:
305 if 'init' in view and 'assembly' in view['init'] and 'loc' in view['init']:
306 if view['init']['assembly'] == assembly["name"]:
307 default_seq = view['init']['loc'].split(":")[0]
308 if "views" in view:
309 subviews = view["views"]
310 for subview in subviews:
311 if 'init' in subview and 'assembly' in subview['init'] and 'loc' in subview['init']:
312 if subview['init']['assembly'] == assembly["name"]:
313 default_seq = subview['init']['loc'].split(":")[0]
314
315 self.assembly_ids[assembly["name"]] = default_seq
316
317 def _load_old_genome_views(self):
318
319 views = {}
320
321 config_path = os.path.join(self.outdir, "config.json")
322 with open(config_path, "r") as config_file:
323 config_json = json.load(config_file)
324
325 # Find default synteny views existing from a previous jbrowse dataset
326 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']:
327 for view in config_json['defaultSession']['views']:
328 if view['type'] != "LinearSyntenyView":
329 if 'init' in view and 'assembly' in view['init']:
330 views[view['init']['assembly']] = view
331
332 return views
333
334 def _load_old_synteny_views(self):
335
336 views = []
337
338 config_path = os.path.join(self.outdir, "config.json")
339 with open(config_path, "r") as config_file:
340 config_json = json.load(config_file)
341
342 # Find default synteny views existing from a previous jbrowse dataset
343 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']:
344 for view in config_json['defaultSession']['views']:
345 if view['type'] == "LinearSyntenyView":
346 views.append(view)
347
348 return views
349
350 def add_assembly(self, path, label, is_remote=False, cytobands=None, ref_name_aliases=None):
351
352 if not is_remote:
353 # Find a non-existing filename for the new genome
354 # (to avoid colision when upgrading an existing instance)
355 rel_seq_path = os.path.join("data", label)
356 seq_path = os.path.join(self.outdir, rel_seq_path)
357 fn_try = 1
358 while (
359 os.path.exists(seq_path + ".fasta")
360 or os.path.exists(seq_path + ".fasta.gz")
361 or os.path.exists(seq_path + ".fasta.gz.fai")
362 or os.path.exists(seq_path + ".fasta.gz.gzi")
363 ):
364 rel_seq_path = os.path.join("data", f"{label}{fn_try}")
365 seq_path = os.path.join(self.outdir, rel_seq_path)
366 fn_try += 1
367
368 # Check if the assembly already exists from a previous run (--update mode)
369 if self.update:
370
371 config_path = os.path.join(self.outdir, "config.json")
372 with open(config_path, "r") as config_file:
373 config_json = json.load(config_file)
374
375 for asby in config_json['assemblies']:
376 if asby['name'] == label:
377
378 # Find default views existing for this assembly
379 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']:
380 for view in config_json['defaultSession']['views']:
381 if 'init' in view and 'assembly' in view['init']:
382 if view['init']['assembly'] == label:
383
384 log.info("Found existing assembly from existing JBrowse2 instance, preserving it")
385
386 self.default_views[view['init']['assembly']] = view
387
388 return label
389
390 # Copy ref alias file if any
391 if ref_name_aliases:
392 copied_ref_name_aliases = seq_path + ".aliases"
393 shutil.copy(ref_name_aliases, copied_ref_name_aliases)
394 copied_ref_name_aliases = rel_seq_path + ".aliases"
395
396 # Copy cytobands file if any
397 if cytobands:
398 copied_cytobands = seq_path + ".cytobands"
399 shutil.copy(cytobands, copied_cytobands)
400 copied_cytobands = rel_seq_path + ".cytobands"
401
402 # Find a non-existing label for the new genome
403 # (to avoid colision when upgrading an existing instance)
404 lab_try = 1
405 uniq_label = label
406 while uniq_label in self.assembly_ids:
407 uniq_label = label + str(lab_try)
408 lab_try += 1
409
410 if is_remote:
411
412 # Find a default scaffold to display
413 with requests.get(path + ".fai", stream=True) as response:
414 response.raise_for_status()
415 first_seq = next(response.iter_lines())
416 first_seq = first_seq.decode("utf-8").split('\t')[0]
417
418 self.assembly_ids[uniq_label] = first_seq
419
420 # We assume we just need to suffix url with .fai and .gzi for indexes.
421 cmd_jb = [
422 "jbrowse",
423 "add-assembly",
424 "--name",
425 uniq_label,
426 "--type",
427 "bgzipFasta",
428 "--out",
429 self.outdir,
430 "--skipCheck",
431 ]
432
433 if ref_name_aliases:
434 cmd_jb.extend([
435 "--refNameAliases",
436 copied_ref_name_aliases,
437 ])
438
439 cmd_jb.append(path) # Path is an url in remote mode
440
441 self.subprocess_check_call(cmd_jb)
442 else:
443 # Find a default scaffold to display
444 with open(path, "r") as fa_handle:
445 fa_header = fa_handle.readline()[1:].strip().split(" ")[0]
446
447 self.assembly_ids[uniq_label] = fa_header
448
449 copied_genome = seq_path + ".fasta"
450 shutil.copy(path, copied_genome)
451
452 # Compress with bgzip
453 cmd = ["bgzip", copied_genome]
454 self.subprocess_check_call(cmd)
455
456 # FAI Index
457 cmd = ["samtools", "faidx", copied_genome + ".gz"]
458 self.subprocess_check_call(cmd)
459
460 cmd_jb = [
461 "jbrowse",
462 "add-assembly",
463 "--load",
464 "inPlace",
465 "--name",
466 uniq_label,
467 "--type",
468 "bgzipFasta",
469 "--out",
470 self.outdir,
471 "--skipCheck",
472 ]
473
474 if ref_name_aliases:
475 cmd_jb.extend([
476 "--refNameAliases",
477 copied_ref_name_aliases,
478 ])
479
480 cmd_jb.append(rel_seq_path + ".fasta.gz")
481
482 self.subprocess_check_call(cmd_jb)
483
484 if cytobands:
485 self.add_cytobands(uniq_label, copied_cytobands)
486
487 return uniq_label
488
489 def add_cytobands(self, assembly_name, cytobands_path):
490
491 config_path = os.path.join(self.outdir, "config.json")
492 with open(config_path, "r") as config_file:
493 config_json = json.load(config_file)
494
495 config_data = {}
496
497 config_data["cytobands"] = {
498 "adapter": {
499 "type": "CytobandAdapter",
500 "cytobandLocation": {
501 "uri": cytobands_path
502 }
503 }
504 }
505
506 filled_assemblies = []
507 for assembly in config_json["assemblies"]:
508 if assembly["name"] == assembly_name:
509 assembly.update(config_data)
510 filled_assemblies.append(assembly)
511 config_json["assemblies"] = filled_assemblies
512
513 with open(config_path, "w") as config_file:
514 json.dump(config_json, config_file, indent=2)
515
516 def text_index(self):
517
518 for ass in self.tracksToIndex:
519 tracks = self.tracksToIndex[ass]
520 args = [
521 "jbrowse",
522 "text-index",
523 "--target",
524 self.outdir,
525 "--assemblies",
526 ass,
527 ]
528
529 tracks = ",".join(tracks)
530 if tracks:
531 args += ["--tracks", tracks]
532
533 log.info(f"-----> Running text-index on assembly {ass} and tracks {tracks}")
534
535 # Only run index if we want to index at least one
536 # If --tracks is not specified, it will index everything
537 self.subprocess_check_call(args)
538
539 def add_gc_content(self, parent, trackData, **kwargs):
540
541 adapter = {}
542 existing = os.path.join(self.outdir, "config.json")
543 if os.path.exists(existing):
544 with open(existing, "r") as existing_conf:
545 conf = json.load(existing_conf)
546 if "assemblies" in conf:
547 for assembly in conf["assemblies"]:
548 if assembly.get('name', "") == parent['uniq_id']:
549 adapter = assembly.get('sequence', {}).get('adapter', {})
550
551 json_track_data = {
552 "type": "GCContentTrack",
553 "trackId": trackData["label"],
554 "name": trackData["key"],
555 "adapter": adapter,
556 "category": [trackData["category"]],
557 "assemblyNames": [parent['uniq_id']],
558 }
559
560 style_json = self._prepare_track_style(trackData)
561
562 json_track_data.update(style_json)
563
564 self.subprocess_check_call(
565 [
566 "jbrowse",
567 "add-track-json",
568 "--target",
569 self.outdir,
570 json.dumps(json_track_data),
571 ]
572 )
573
574 def add_bigwig(self, parent, data, trackData, wiggleOpts, **kwargs):
575
576 if trackData['remote']:
577 rel_dest = data
578 else:
579 rel_dest = os.path.join("data", trackData["label"] + ".bw")
580 dest = os.path.join(self.outdir, rel_dest)
581 self.symlink_or_copy(os.path.realpath(data), dest)
582
583 style_json = self._prepare_track_style(trackData)
584
585 track_metadata = self._prepare_track_metadata(trackData)
586
587 style_json.update(track_metadata)
588
589 self._add_track(
590 trackData["label"],
591 trackData["key"],
592 trackData["category"],
593 rel_dest,
594 parent,
595 config=style_json,
596 remote=trackData['remote']
597 )
598
599 def add_bigwig_multi(self, parent, data_files, trackData, wiggleOpts, **kwargs):
600
601 subadapters = []
602
603 sub_num = 0
604 for data in data_files:
605 if trackData['remote']:
606 rel_dest = data[1]
607 else:
608 rel_dest = os.path.join("data", f"{trackData['label']}_sub{sub_num}.bw")
609 dest = os.path.join(self.outdir, rel_dest)
610 self.symlink_or_copy(os.path.realpath(data[1]), dest)
611
612 subadapters.append({
613 "type": "BigWigAdapter",
614 "name": data[0],
615 "bigWigLocation": {
616 "uri": rel_dest,
617 "locationType": "UriLocation"
618 }
619 })
620 sub_num += 1
621
622 json_track_data = {
623 "type": "MultiQuantitativeTrack",
624 "trackId": trackData["label"],
625 "name": trackData["key"],
626 "adapter": {
627 "type": "MultiWiggleAdapter",
628 "subadapters": subadapters
629 },
630 "category": [trackData["category"]],
631 "assemblyNames": [parent['uniq_id']],
632 }
633
634 style_json = self._prepare_track_style(trackData)
635
636 json_track_data.update(style_json)
637
638 track_metadata = self._prepare_track_metadata(trackData)
639
640 json_track_data.update(track_metadata)
641
642 self.subprocess_check_call(
643 [
644 "jbrowse",
645 "add-track-json",
646 "--target",
647 self.outdir,
648 json.dumps(json_track_data),
649 ]
650 )
651
652 # Anything ending in "am" (Bam or Cram)
653 def add_xam(self, parent, data, trackData, xamOpts, index=None, ext="bam", **kwargs):
654 index_ext = "bai"
655 if ext == "cram":
656 index_ext = "crai"
657
658 if trackData['remote']:
659 rel_dest = data
660 # Index will be set automatically as xam url + xai .suffix by add-track cmd
661 else:
662 rel_dest = os.path.join("data", trackData["label"] + f".{ext}")
663 dest = os.path.join(self.outdir, rel_dest)
664 self.symlink_or_copy(os.path.realpath(data), dest)
665
666 if index is not None and os.path.exists(os.path.realpath(index)):
667 # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
668 self.subprocess_check_call(
669 ["cp", os.path.realpath(index), dest + f".{index_ext}"]
670 )
671 else:
672 # Can happen in exotic condition
673 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
674 # => no index generated by galaxy, but there might be one next to the symlink target
675 # this trick allows to skip the bam sorting made by galaxy if already done outside
676 if os.path.exists(os.path.realpath(data) + f".{index_ext}"):
677 self.symlink_or_copy(
678 os.path.realpath(data) + f".{index_ext}", dest + f".{index_ext}"
679 )
680 else:
681 log.warn(
682 f"Could not find a bam index (.{index_ext} file) for {data}"
683 )
684
685 style_json = self._prepare_track_style(trackData)
686
687 track_metadata = self._prepare_track_metadata(trackData)
688
689 style_json.update(track_metadata)
690
691 self._add_track(
692 trackData["label"],
693 trackData["key"],
694 trackData["category"],
695 rel_dest,
696 parent,
697 config=style_json,
698 remote=trackData['remote']
699 )
700
701 def add_vcf(self, parent, data, trackData, vcfOpts={}, zipped=False, **kwargs):
702 if trackData['remote']:
703 rel_dest = data
704 else:
705 if zipped:
706 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz")
707 dest = os.path.join(self.outdir, rel_dest)
708 shutil.copy(os.path.realpath(data), dest)
709 else:
710 rel_dest = os.path.join("data", trackData["label"] + ".vcf")
711 dest = os.path.join(self.outdir, rel_dest)
712 shutil.copy(os.path.realpath(data), dest)
713
714 cmd = ["bgzip", dest]
715 self.subprocess_check_call(cmd)
716 cmd = ["tabix", dest + ".gz"]
717 self.subprocess_check_call(cmd)
718
719 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz")
720
721 style_json = self._prepare_track_style(trackData)
722
723 formatdetails = self._prepare_format_details(trackData)
724
725 style_json.update(formatdetails)
726
727 track_metadata = self._prepare_track_metadata(trackData)
728
729 style_json.update(track_metadata)
730
731 self._add_track(
732 trackData["label"],
733 trackData["key"],
734 trackData["category"],
735 rel_dest,
736 parent,
737 config=style_json,
738 remote=trackData['remote']
739 )
740
741 def add_gff(self, parent, data, format, trackData, gffOpts, **kwargs):
742 if trackData['remote']:
743 rel_dest = data
744 else:
745 rel_dest = os.path.join("data", trackData["label"] + ".gff")
746 dest = os.path.join(self.outdir, rel_dest)
747 rel_dest = rel_dest + ".gz"
748
749 self._sort_gff(data, dest)
750
751 style_json = self._prepare_track_style(trackData)
752
753 formatdetails = self._prepare_format_details(trackData)
754
755 style_json.update(formatdetails)
756
757 track_metadata = self._prepare_track_metadata(trackData)
758
759 style_json.update(track_metadata)
760
761 if gffOpts.get('index', 'false') in ("yes", "true", "True"):
762 if parent['uniq_id'] not in self.tracksToIndex:
763 self.tracksToIndex[parent['uniq_id']] = []
764 self.tracksToIndex[parent['uniq_id']].append(trackData["label"])
765
766 self._add_track(
767 trackData["label"],
768 trackData["key"],
769 trackData["category"],
770 rel_dest,
771 parent,
772 config=style_json,
773 remote=trackData['remote']
774 )
775
776 def add_bed(self, parent, data, format, trackData, gffOpts, **kwargs):
777 if trackData['remote']:
778 rel_dest = data
779 else:
780 rel_dest = os.path.join("data", trackData["label"] + ".bed")
781 dest = os.path.join(self.outdir, rel_dest)
782 rel_dest = rel_dest + ".gz"
783
784 self._sort_bed(data, dest)
785
786 style_json = self._prepare_track_style(trackData)
787
788 formatdetails = self._prepare_format_details(trackData)
789
790 style_json.update(formatdetails)
791
792 track_metadata = self._prepare_track_metadata(trackData)
793
794 style_json.update(track_metadata)
795
796 if gffOpts.get('index', 'false') in ("yes", "true", "True"):
797 if parent['uniq_id'] not in self.tracksToIndex:
798 self.tracksToIndex[parent['uniq_id']] = []
799 self.tracksToIndex[parent['uniq_id']].append(trackData["label"])
800
801 self._add_track(
802 trackData["label"],
803 trackData["key"],
804 trackData["category"],
805 rel_dest,
806 parent,
807 config=style_json,
808 remote=trackData['remote']
809 )
810
811 def add_paf(self, parent, data, trackData, pafOpts, **kwargs):
812
813 if trackData['remote']:
814 rel_dest = data
815
816 if rel_dest.endswith('pif') or rel_dest.endswith('pif.gz'):
817 adapter = "pif"
818 else:
819 adapter = "paf"
820 else:
821 rel_dest = os.path.join("data", trackData["label"] + ".pif.gz")
822 dest = os.path.join(self.outdir, rel_dest)
823
824 cmd = ["jbrowse", "make-pif", "--out", dest, os.path.realpath(data)]
825 self.subprocess_check_call(cmd)
826
827 adapter = "pif"
828
829 if trackData["style"]["display"] == "LinearBasicDisplay":
830 # Normal style track
831
832 json_track_data = {
833 "type": "SyntenyTrack",
834 "trackId": trackData["label"],
835 "name": trackData["key"],
836 "adapter": {
837 "type": "PairwiseIndexedPAFAdapter",
838 "pifGzLocation": {
839 "uri": rel_dest,
840 },
841 "index": {
842 "location": {
843 "uri": rel_dest + ".tbi",
844 }
845 },
846 },
847 "category": [trackData["category"]],
848 "assemblyNames": [parent['uniq_id']],
849 }
850 else:
851 # Synteny viewer
852
853 json_track_data = {
854 "type": "SyntenyTrack",
855 "trackId": trackData["label"],
856 "name": trackData["key"],
857 "adapter": {
858 "assemblyNames": [
859 parent['uniq_id'],
860 "", # Placeholder until we know the next genome id
861 ],
862 },
863 "category": [trackData["category"]],
864 "assemblyNames": [
865 parent['uniq_id'],
866 "", # Placeholder until we know the next genome id
867 ]
868 }
869
870 if adapter == "pif":
871 json_track_data["adapter"].update({
872 "type": "PairwiseIndexedPAFAdapter",
873 "pifGzLocation": {
874 "uri": rel_dest,
875 },
876 "index": {
877 "location": {
878 "uri": rel_dest + ".tbi",
879 }
880 },
881 })
882 else:
883 json_track_data["adapter"].update({
884 "type": "PAFAdapter",
885 "pafLocation": {
886 "uri": rel_dest,
887 },
888 })
889
890 style_json = self._prepare_track_style(trackData)
891
892 json_track_data.update(style_json)
893
894 track_metadata = self._prepare_track_metadata(trackData)
895
896 json_track_data.update(track_metadata)
897
898 if trackData["style"]["display"] == "LinearBasicDisplay":
899 self.subprocess_check_call(
900 [
901 "jbrowse",
902 "add-track-json",
903 "--target",
904 self.outdir,
905 json.dumps(json_track_data),
906 ]
907 )
908 else:
909 self.synteny_tracks.append(json_track_data)
910
911 def add_hic(self, parent, data, trackData, hicOpts, **kwargs):
912 if trackData['remote']:
913 rel_dest = data
914 else:
915 rel_dest = os.path.join("data", trackData["label"] + ".hic")
916 dest = os.path.join(self.outdir, rel_dest)
917 self.symlink_or_copy(os.path.realpath(data), dest)
918
919 style_json = self._prepare_track_style(trackData)
920
921 track_metadata = self._prepare_track_metadata(trackData)
922
923 style_json.update(track_metadata)
924
925 self._add_track(
926 trackData["label"],
927 trackData["key"],
928 trackData["category"],
929 rel_dest,
930 parent,
931 config=style_json,
932 remote=trackData['remote']
933 )
934
935 def add_maf(self, parent, data, trackData, mafOpts, **kwargs):
936
937 # Add needed plugin
938 plugin_def = {
939 "name": "MafViewer",
940 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js"
941 }
942 self.plugins.append(plugin_def)
943
944 rel_dest = os.path.join("data", trackData["label"] + ".maf")
945 dest = os.path.join(self.outdir, rel_dest)
946
947 assembly_name = mafOpts.get("assembly_name", "")
948 if not assembly_name:
949 # Guess from assembly
950 assembly_name = parent['uniq_id']
951
952 self._convert_maf(data, dest, assembly_name)
953
954 # Extract samples list
955 mafs = open(data, "r").readlines()
956 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))]
957 samp = [x.split()[1] for x in mafss if len(x.split()) > 0]
958 sampu = list(dict.fromkeys(samp))
959 samples = [x.split(".")[0] for x in sampu]
960 samples.sort()
961
962 json_track_data = {
963 "type": "MafTrack",
964 "trackId": trackData["label"],
965 "name": trackData["key"],
966 "adapter": {
967 "type": "MafTabixAdapter",
968 "samples": samples,
969 "bedGzLocation": {
970 "uri": rel_dest + ".gz",
971 },
972 "index": {
973 "location": {
974 "uri": rel_dest + ".gz.tbi",
975 },
976 },
977 },
978 "category": [trackData["category"]],
979 "assemblyNames": [parent['uniq_id']],
980 }
981
982 style_json = self._prepare_track_style(trackData)
983
984 json_track_data.update(style_json)
985
986 track_metadata = self._prepare_track_metadata(trackData)
987
988 json_track_data.update(track_metadata)
989
990 self.subprocess_check_call(
991 [
992 "jbrowse",
993 "add-track-json",
994 "--target",
995 self.outdir,
996 json.dumps(json_track_data),
997 ]
998 )
999
1000 def add_sparql(self, parent, url, query, query_refnames, trackData):
1001 json_track_data = {
1002 "type": "FeatureTrack",
1003 "trackId": trackData["label"],
1004 "name": trackData["key"],
1005 "adapter": {
1006 "type": "SPARQLAdapter",
1007 "endpoint": {"uri": url, "locationType": "UriLocation"},
1008 "queryTemplate": query,
1009 },
1010 "category": [trackData["category"]],
1011 "assemblyNames": [parent['uniq_id']],
1012 }
1013
1014 if query_refnames:
1015 json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames
1016
1017 # TODO handle metadata somehow for sparql too
1018
1019 self.subprocess_check_call(
1020 [
1021 "jbrowse",
1022 "add-track-json",
1023 "--target",
1024 self.outdir,
1025 json.dumps(json_track_data),
1026 ]
1027 )
1028
1029 def _add_track(self, track_id, label, category, path, assembly, config=None, trackType=None, load_action="inPlace", assemblies=None, remote=False):
1030 """
1031 Adds a track to config.json using Jbrowse add-track cli
1032
1033 By default, using `--load inPlace`: the file is supposed to be already placed at the `path` relative to
1034 the outdir, `jbrowse add-track` will not touch it and trust us that the file is there and ready to use.
1035
1036 With `load_action` parameter, you can ask `jbrowse add-track` to copy/move/symlink the file for you.
1037 Not done by default because we often need more control on file copying/symlink for specific cases (indexes, symlinks of symlinks, ...)
1038 """
1039
1040 cmd = [
1041 "jbrowse",
1042 "add-track",
1043 "--name",
1044 label,
1045 "--category",
1046 category,
1047 "--target",
1048 self.outdir,
1049 "--trackId",
1050 track_id,
1051 "--assemblyNames",
1052 assemblies if assemblies else assembly['uniq_id'],
1053 ]
1054
1055 if not remote:
1056 cmd.append("--load")
1057 cmd.append(load_action)
1058
1059 if config:
1060 cmd.append("--config")
1061 cmd.append(json.dumps(config))
1062
1063 if trackType:
1064 cmd.append("--trackType")
1065 cmd.append(trackType)
1066
1067 cmd.append(path)
1068
1069 self.subprocess_check_call(cmd)
1070
1071 def _sort_gff(self, data, dest):
1072 # Only index if not already done
1073 if not os.path.exists(dest):
1074 # Not using jbrowse sort-gff because it uses sort and has the problem exposed on https://github.com/tao-bioinfo/gff3sort
1075 cmd = f"gff3sort.pl --precise '{data}' | grep -v \"^$\" > '{dest}'"
1076 self.subprocess_popen(cmd, cwd=False)
1077
1078 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False)
1079 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"], cwd=False)
1080
1081 def _sort_bed(self, data, dest):
1082 # Only index if not already done
1083 if not os.path.exists(dest):
1084 cmd = ["sort", "-k1,1", "-k2,2n", data]
1085 with open(dest, "w") as handle:
1086 self.subprocess_check_call(cmd, output=handle)
1087
1088 self.subprocess_check_call(["bgzip", "-f", dest])
1089 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"])
1090
1091 def _convert_maf(self, data, dest, assembly_name):
1092 # Only convert if not already done
1093 if not os.path.exists(dest):
1094
1095 dest_bed = dest + ".bed"
1096 cmd = ["python", os.path.join(SELF_LOCATION, "maf2bed.py"), assembly_name, data, dest_bed]
1097 self.subprocess_check_call(cmd, cwd=False)
1098
1099 cmd = ["sort", "-k1,1", "-k2,2n", dest_bed]
1100 with open(dest, "w") as handle:
1101 self.subprocess_check_call(cmd, output=handle)
1102
1103 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False)
1104 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"], cwd=False)
1105
1106 def process_annotations(self, track, parent):
1107 category = track["category"].replace("__pd__date__pd__", TODAY)
1108
1109 track_labels = []
1110
1111 for i, (
1112 dataset_path,
1113 dataset_ext,
1114 track_human_label,
1115 extra_metadata,
1116 ) in enumerate(track["trackfiles"]):
1117 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
1118 for key, value in mapped_chars.items():
1119 track_human_label = track_human_label.replace(value, key)
1120
1121 is_multi = type(dataset_path) is list
1122
1123 log.info(
1124 f"-----> Processing track {category} / {track_human_label} ({dataset_ext}, {len(dataset_path) if is_multi else 1} files)"
1125 )
1126
1127 outputTrackConfig = {
1128 "category": category,
1129 }
1130
1131 outputTrackConfig["key"] = track_human_label
1132 # We add extra data to hash for the case of non-file tracks
1133 if (
1134 "conf" in track
1135 and "options" in track["conf"]
1136 and "url" in track["conf"]["options"]
1137 ):
1138 non_file_info = track["conf"]["options"]["url"]
1139 else:
1140 non_file_info = ""
1141
1142 # I chose to use track['category'] instead of 'category' here. This
1143 # is intentional. This way re-running the tool on a different date
1144 # will not generate different hashes and make comparison of outputs
1145 # much simpler.
1146 hashData = [
1147 str(dataset_path),
1148 track_human_label,
1149 track["category"],
1150 non_file_info,
1151 parent["uniq_id"],
1152 ]
1153 hashData = "|".join(hashData).encode("utf-8")
1154 outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + f"_{track['track_num']}_{i}"
1155 outputTrackConfig["metadata"] = extra_metadata
1156
1157 outputTrackConfig["style"] = track["style"]
1158
1159 outputTrackConfig["formatdetails"] = track["formatdetails"]
1160
1161 outputTrackConfig["remote"] = track["remote"]
1162
1163 # Guess extension for remote data
1164 if dataset_ext == "gff,gff3,bed":
1165 if dataset_path.endswith(".bed") or dataset_path.endswith(".bed.gz"):
1166 dataset_ext = "bed"
1167 else:
1168 dataset_ext = "gff"
1169 elif dataset_ext == "vcf,vcf_bgzip":
1170 if dataset_path.endswith(".vcf.gz"):
1171 dataset_ext = "vcf_bgzip"
1172 else:
1173 dataset_ext = "vcf"
1174
1175 if dataset_ext in ("gff", "gff3"):
1176 self.add_gff(
1177 parent,
1178 dataset_path,
1179 dataset_ext,
1180 outputTrackConfig,
1181 track["conf"]["options"]["gff"],
1182 )
1183 elif dataset_ext == "bed":
1184 self.add_bed(
1185 parent,
1186 dataset_path,
1187 dataset_ext,
1188 outputTrackConfig,
1189 track["conf"]["options"]["gff"],
1190 )
1191 elif dataset_ext == "bigwig":
1192 if is_multi:
1193 self.add_bigwig_multi(
1194 parent,
1195 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"]
1196 )
1197 else:
1198 self.add_bigwig(
1199 parent,
1200 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"]
1201 )
1202 elif dataset_ext == "maf":
1203 self.add_maf(
1204 parent,
1205 dataset_path, outputTrackConfig, track["conf"]["options"]["maf"]
1206 )
1207 elif dataset_ext == "bam":
1208
1209 if track["remote"]:
1210 bam_index = dataset_path + '.bai'
1211 else:
1212 real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][
1213 "bam_index"
1214 ]
1215 if not isinstance(real_indexes, list):
1216 # <bam_indices>
1217 # <bam_index>/path/to/a.bam.bai</bam_index>
1218 # </bam_indices>
1219 #
1220 # The above will result in the 'bam_index' key containing a
1221 # string. If there are two or more indices, the container
1222 # becomes a list. Fun!
1223 real_indexes = [real_indexes]
1224
1225 bam_index = real_indexes[i]
1226
1227 self.add_xam(
1228 parent,
1229 dataset_path,
1230 outputTrackConfig,
1231 track["conf"]["options"]["pileup"],
1232 index=bam_index,
1233 ext="bam",
1234 )
1235 elif dataset_ext == "cram":
1236
1237 if track["remote"]:
1238 cram_index = dataset_path + '.crai'
1239 else:
1240 real_indexes = track["conf"]["options"]["cram"]["cram_indices"][
1241 "cram_index"
1242 ]
1243 if not isinstance(real_indexes, list):
1244 # <bam_indices>
1245 # <bam_index>/path/to/a.bam.bai</bam_index>
1246 # </bam_indices>
1247 #
1248 # The above will result in the 'bam_index' key containing a
1249 # string. If there are two or more indices, the container
1250 # becomes a list. Fun!
1251 real_indexes = [real_indexes]
1252
1253 cram_index = real_indexes[i]
1254
1255 self.add_xam(
1256 parent,
1257 dataset_path,
1258 outputTrackConfig,
1259 track["conf"]["options"]["cram"],
1260 index=cram_index,
1261 ext="cram",
1262 )
1263 elif dataset_ext == "vcf":
1264 self.add_vcf(
1265 parent,
1266 dataset_path,
1267 outputTrackConfig
1268 )
1269 elif dataset_ext == "vcf_bgzip":
1270 self.add_vcf(
1271 parent,
1272 dataset_path,
1273 outputTrackConfig,
1274 zipped=True
1275 )
1276 elif dataset_ext == "paf": # https://fr.wikipedia.org/wiki/Paf_le_chien
1277 self.add_paf(
1278 parent,
1279 dataset_path,
1280 outputTrackConfig,
1281 track["conf"]["options"]["synteny"]
1282 )
1283 elif dataset_ext in ("hic"):
1284 self.add_hic(
1285 parent,
1286 dataset_path,
1287 outputTrackConfig,
1288 track["conf"]["options"]["hic"]
1289 )
1290 elif dataset_ext == "sparql":
1291 sparql_query = track["conf"]["options"]["sparql"]["query"]
1292 for key, value in mapped_chars.items():
1293 sparql_query = sparql_query.replace(value, key)
1294 sparql_query_refnames = track["conf"]["options"]["sparql"].get("query_refnames", "")
1295 if sparql_query_refnames:
1296 for key, value in mapped_chars.items():
1297 sparql_query_refnames = sparql_query_refnames.replace(value, key)
1298 self.add_sparql(
1299 parent,
1300 track["conf"]["options"]["sparql"]["url"],
1301 sparql_query,
1302 sparql_query_refnames,
1303 outputTrackConfig,
1304 )
1305 elif dataset_ext == "gc":
1306 self.add_gc_content(
1307 parent,
1308 outputTrackConfig,
1309 )
1310 else:
1311 log.error(f"Do not know how to handle {dataset_ext}")
1312
1313 track_labels.append(outputTrackConfig["label"])
1314
1315 # Return non-human label for use in other fields
1316 return track_labels
1317
1318 def add_default_view_genome(self, genome, default_loc, tracks_on):
1319
1320 refName = ""
1321 start = end = None
1322 if default_loc:
1323 loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", default_loc)
1324 if loc_match:
1325 refName = loc_match.group(1)
1326 start = int(loc_match.group(2))
1327 end = int(loc_match.group(3))
1328
1329 if not refName and self.assembly_ids[genome['uniq_id']]:
1330 refName = self.assembly_ids[genome['uniq_id']]
1331
1332 if start and end:
1333 loc_str = f"{refName}:{start}-{end}"
1334 else:
1335 loc_str = refName
1336
1337 # Updating an existing jbrowse instance, merge with pre-existing view
1338 view_specs = None
1339 if self.update:
1340 for existing in self.default_views.values():
1341 if len(existing) and existing["type"] == "LinearGenomeView":
1342 if existing['init']['assembly'] == genome['uniq_id']:
1343 view_specs = existing
1344 if loc_str:
1345 view_specs['init']['loc'] = loc_str
1346 view_specs['init']['tracks'].extend(tracks_on)
1347
1348 if view_specs is None: # Not updating, or updating from synteny
1349 view_specs = {
1350 "type": "LinearGenomeView",
1351 "init": {
1352 "assembly": genome['uniq_id'],
1353 "loc": loc_str,
1354 "tracks": tracks_on
1355 }
1356 }
1357
1358 return view_specs
1359
1360 def add_default_view_synteny(self, genome_views, synteny_tracks):
1361
1362 # Add json for cached synteny tracks
1363 # We cache them because we need to know the target genome uniq_id
1364 for strack in synteny_tracks:
1365
1366 # Target assembly is the next genome, find its uniq_id
1367 query_assembly = strack["assemblyNames"][0]
1368 ass_uniq_ids = list(self.assembly_ids.keys())
1369 query_index = ass_uniq_ids.index(query_assembly)
1370 target_assembly = ass_uniq_ids[query_index + 1]
1371
1372 strack["assemblyNames"][1] = target_assembly
1373 strack["adapter"]["assemblyNames"][1] = target_assembly
1374
1375 self.subprocess_check_call(
1376 [
1377 "jbrowse",
1378 "add-track-json",
1379 "--target",
1380 self.outdir,
1381 json.dumps(strack),
1382 ]
1383 )
1384
1385 # Configure the synteny view
1386 levels = []
1387
1388 for strack in synteny_tracks:
1389 lev = {
1390 "type": "LinearSyntenyViewHelper",
1391 "tracks": [
1392 {
1393 "type": "SyntenyTrack",
1394 "configuration": strack["trackId"],
1395 "displays": [
1396 {
1397 "type": "LinearSyntenyDisplay",
1398 "configuration": strack["trackId"] + "_LinearSyntenyDisplay"
1399 }
1400 ]
1401 }
1402 ],
1403 "height": 100,
1404 "level": len(levels)
1405 }
1406 levels.append(lev)
1407
1408 view_specs = {
1409 "type": "LinearSyntenyView",
1410 "views": genome_views,
1411 "levels": levels
1412 }
1413
1414 return view_specs
1415
1416 def add_default_session(self, default_views):
1417 """
1418 Add some default session settings: set some assemblies/tracks on/off
1419
1420 This allows to select a default view:
1421 - jb type (Linear, Circular, etc)
1422 - default location on an assembly
1423 - default tracks
1424 - ...
1425
1426 Now using this method:
1427 https://github.com/GMOD/jbrowse-components/pull/4907
1428
1429 Different methods that were tested/discussed earlier:
1430 - using a defaultSession item in config.json before PR 4970: this proved to be difficult:
1431 forced to write a full session block, including hard-coded/hard-to-guess items,
1432 no good way to let Jbrowse2 display a scaffold without knowing its size
1433 - using JBrowse2 as an embedded React component in a tool-generated html file:
1434 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ...
1435 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below)
1436 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session
1437 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe
1438
1439 Xrefs to understand the choices:
1440 https://github.com/GMOD/jbrowse-components/issues/2708
1441 https://github.com/GMOD/jbrowse-components/discussions/3568
1442 https://github.com/GMOD/jbrowse-components/pull/4148
1443 """
1444
1445 if self.use_synteny_viewer:
1446 session_name = "Synteny"
1447 else:
1448 session_name = ', '.join(x['init']['assembly'] for x in default_views)
1449
1450 session_spec = {
1451 "name": session_name,
1452 "views": default_views
1453 }
1454
1455 config_path = os.path.join(self.outdir, "config.json")
1456 with open(config_path, "r") as config_file:
1457 config_json = json.load(config_file)
1458
1459 config_json["defaultSession"].update(session_spec)
1460
1461 with open(config_path, "w") as config_file:
1462 json.dump(config_json, config_file, indent=2)
1463
1464 def add_general_configuration(self, data):
1465 """
1466 Add some general configuration to the config.json file
1467 """
1468
1469 config_path = os.path.join(self.outdir, "config.json")
1470 with open(config_path, "r") as config_file:
1471 config_json = json.load(config_file)
1472
1473 config_data = {}
1474
1475 config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
1476
1477 config_data["theme"] = {
1478 "palette": {
1479 "primary": {"main": data.get("primary_color", "#0D233F")},
1480 "secondary": {"main": data.get("secondary_color", "#721E63")},
1481 "tertiary": {"main": data.get("tertiary_color", "#135560")},
1482 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
1483 },
1484 "typography": {"fontSize": int(data.get("font_size", 10))},
1485 }
1486
1487 config_json["configuration"].update(config_data)
1488
1489 with open(config_path, "w") as config_file:
1490 json.dump(config_json, config_file, indent=2)
1491
1492 def add_plugins(self, data):
1493 """
1494 Add plugins to the config.json file
1495 """
1496
1497 config_path = os.path.join(self.outdir, "config.json")
1498 with open(config_path, "r") as config_file:
1499 config_json = json.load(config_file)
1500
1501 if "plugins" not in config_json:
1502 config_json["plugins"] = []
1503
1504 config_json["plugins"].extend(data)
1505
1506 with open(config_path, "w") as config_file:
1507 json.dump(config_json, config_file, indent=2)
1508
1509 def clone_jbrowse(self, jbrowse_dir, destination):
1510 """
1511 Clone a JBrowse directory into a destination directory.
1512
1513 Not using `jbrowse create` command to allow running on internet-less compute + to make sure code is frozen
1514 """
1515
1516 copytree(jbrowse_dir, destination)
1517 try:
1518 shutil.rmtree(os.path.join(destination, "test_data"))
1519 except OSError as e:
1520 log.error(f"Error: {e.filename} - {e.strerror}.")
1521
1522 if not os.path.exists(os.path.join(destination, "data")):
1523 # It can already exist if upgrading an instance
1524 os.makedirs(os.path.join(destination, "data"))
1525 log.info(f"makedir {os.path.join(destination, 'data')}")
1526
1527 os.symlink("./data/config.json", os.path.join(destination, "config.json"))
1528
1529
1530 def copytree(src, dst, symlinks=False, ignore=None):
1531 for item in os.listdir(src):
1532 s = os.path.join(src, item)
1533 d = os.path.join(dst, item)
1534 if os.path.isdir(s):
1535 shutil.copytree(s, d, symlinks, ignore)
1536 else:
1537 shutil.copy2(s, d)
1538
1539
1540 def parse_style_conf(item):
1541 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]:
1542 if item.attrib["type"] == "boolean":
1543 return item.text in ("yes", "true", "True")
1544 elif item.attrib["type"] == "integer":
1545 return int(item.text)
1546 else:
1547 return item.text
1548
1549
1550 def validate_synteny(real_root):
1551
1552 if len(real_root.findall('assembly/tracks/track[@format="synteny"]')) == 0:
1553 # No synteny data, all good
1554 return False
1555
1556 assemblies = real_root.findall("assembly")
1557
1558 if len(assemblies[-1].findall('tracks/track[@format="synteny"]')) > 0 and \
1559 assemblies[-1].find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay":
1560 raise RuntimeError("You should not set a synteny track on the last genome.")
1561
1562 for assembly in assemblies[1:0]:
1563 if len(assembly.findall('tracks/track[@format="synteny"]')) != 1 and \
1564 assembly.find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay":
1565 raise RuntimeError("To use the synteny viewer, you should add a synteny track to each assembly, except the last one.")
1566
1567 return True
1568
1569
1570 if __name__ == "__main__":
1571 parser = argparse.ArgumentParser(description="", epilog="")
1572 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration")
1573
1574 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
1575 parser.add_argument("--update", help="Update an existing JBrowse2 instance", action="store_true")
1576 parser.add_argument("--outdir", help="Output directory", default="out")
1577 args = parser.parse_args()
1578
1579 tree = ET.parse(args.xml.name)
1580 real_root = tree.getroot()
1581
1582 # This should be done ASAP
1583 # Sometimes this comes as `localhost` without a protocol
1584 GALAXY_INFRASTRUCTURE_URL = real_root.find("metadata/galaxyUrl").text
1585 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
1586 # so we'll prepend `http://` and hope for the best. Requests *should*
1587 # be GET and not POST so it should redirect OK
1588 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
1589
1590 jc = JbrowseConnector(
1591 jbrowse=args.jbrowse,
1592 outdir=args.outdir,
1593 update=args.update,
1594 )
1595
1596 # Synteny options are special, check them first
1597 jc.use_synteny_viewer = validate_synteny(real_root)
1598
1599 for assembly in real_root.findall("assembly"):
1600 genome_el = assembly.find('genome')
1601
1602 is_remote = genome_el.attrib.get("remote", "false") == "true"
1603
1604 genome = {
1605 "path": genome_el.attrib["path"] if is_remote else os.path.realpath(genome_el.attrib["path"]),
1606 "meta": metadata_from_node(genome_el.find("metadata")),
1607 "label": genome_el.attrib["label"],
1608 }
1609
1610 cytobands = None
1611 cytobands_el = genome_el.find("cytobands")
1612 if cytobands_el is not None and "path" in cytobands_el.attrib:
1613 cytobands = cytobands_el.attrib["path"]
1614
1615 ref_name_aliases = None
1616 ref_name_aliases_el = genome_el.find("ref_name_aliases")
1617 if ref_name_aliases_el is not None and "path" in ref_name_aliases_el.attrib:
1618 ref_name_aliases = ref_name_aliases_el.attrib["path"]
1619
1620 log.debug("Processing genome", genome)
1621 genome["uniq_id"] = jc.add_assembly(genome["path"], genome["label"], is_remote, cytobands, ref_name_aliases)
1622
1623 default_tracks_on = []
1624
1625 track_num = 0
1626 for track in assembly.findall("tracks/track"):
1627 track_conf = {}
1628 track_conf["trackfiles"] = []
1629 track_conf["track_num"] = track_num
1630
1631 trackfiles = track.findall("files/trackFile") or []
1632
1633 is_multi = False
1634 multi_paths = []
1635 multi_type = None
1636 multi_metadata = {}
1637 try:
1638 multi_in_xml = track.find("options/multitrack")
1639 if multi_in_xml is not None and parse_style_conf(multi_in_xml):
1640 is_multi = True
1641 multi_paths = []
1642 multi_type = trackfiles[0].attrib["ext"]
1643 except KeyError:
1644 pass
1645
1646 is_remote = False
1647 if trackfiles:
1648 for x in trackfiles:
1649 if is_multi:
1650 is_remote = x.attrib.get("remote", "false") == "true"
1651 multi_paths.append(
1652 (x.attrib["label"], x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"]))
1653 )
1654 multi_metadata.update(metadata_from_node(x.find("metadata")))
1655 else:
1656 metadata = metadata_from_node(x.find("metadata"))
1657 is_remote = x.attrib.get("remote", "false") == "true"
1658 track_conf["trackfiles"].append(
1659 (
1660 x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"]),
1661 x.attrib["ext"],
1662 x.attrib["label"],
1663 metadata,
1664 )
1665 )
1666 else:
1667 # For tracks without files (sparql, gc)
1668 track_conf["trackfiles"].append(
1669 (
1670 "", # N/A, no path for sparql or gc
1671 track.attrib["format"],
1672 track.find("options/label").text,
1673 {},
1674 )
1675 )
1676
1677 if is_multi:
1678 etal_tracks_nb = len(multi_paths[1:])
1679 multi_label = f"{multi_paths[0][0]} + {etal_tracks_nb} other track{'s' if etal_tracks_nb > 1 else ''}"
1680
1681 track_conf["trackfiles"].append(
1682 (
1683 multi_paths, # Passing an array of paths to represent as one track
1684 multi_type, # First file type
1685 multi_label, # First file label
1686 multi_metadata, # Mix of all metadata for multiple bigwig => only last file metadata coming from galaxy + custom oness
1687 )
1688 )
1689 track_conf["category"] = track.attrib["cat"]
1690 track_conf["format"] = track.attrib["format"]
1691 track_conf["style"] = {
1692 item.tag: parse_style_conf(item) for item in (track.find("options/style") or [])
1693 }
1694
1695 track_conf["style"] = {
1696 item.tag: parse_style_conf(item) for item in (track.find("options/style") or [])
1697 }
1698
1699 track_conf["style_labels"] = {
1700 item.tag: parse_style_conf(item)
1701 for item in (track.find("options/style_labels") or [])
1702 }
1703 track_conf["formatdetails"] = {
1704 item.tag: parse_style_conf(item) for item in (track.find("options/formatdetails") or [])
1705 }
1706
1707 track_conf["conf"] = etree_to_dict(track.find("options"))
1708
1709 track_conf["remote"] = is_remote
1710
1711 track_labels = jc.process_annotations(track_conf, genome)
1712
1713 if track.attrib["visibility"] == "default_on":
1714 for tlabel in track_labels:
1715 default_tracks_on.append(tlabel)
1716
1717 track_num += 1
1718
1719 default_loc = assembly.find("defaultLocation").text
1720
1721 jc.default_views[genome['uniq_id']] = jc.add_default_view_genome(genome, default_loc, default_tracks_on)
1722
1723 if jc.use_synteny_viewer:
1724 synteny_view = jc.add_default_view_synteny(list(jc.default_views.values()), jc.synteny_tracks)
1725
1726 views_for_session = jc._load_old_synteny_views()
1727
1728 views_for_session.append(synteny_view)
1729 else:
1730 old_views = jc._load_old_genome_views()
1731
1732 for old_view in old_views:
1733 if old_view not in jc.default_views:
1734 jc.default_views[old_view] = old_views[old_view]
1735
1736 views_for_session = list(jc.default_views.values())
1737
1738 general_data = {
1739 "analytics": real_root.find("metadata/general/analytics").text,
1740 "primary_color": real_root.find("metadata/general/primary_color").text,
1741 "secondary_color": real_root.find("metadata/general/secondary_color").text,
1742 "tertiary_color": real_root.find("metadata/general/tertiary_color").text,
1743 "quaternary_color": real_root.find("metadata/general/quaternary_color").text,
1744 "font_size": real_root.find("metadata/general/font_size").text,
1745 }
1746
1747 jc.add_default_session(views_for_session)
1748 jc.add_general_configuration(general_data)
1749 jc.add_plugins(jc.plugins)
1750 jc.text_index()