Mercurial > repos > iuc > jbrowse2
comparison jbrowse2.py @ 0:61add3f58f26 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 4fa86613193c985e0cb9a8fc795c56b8bc7b8532
| author | iuc |
|---|---|
| date | Thu, 02 Oct 2025 10:19:44 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:61add3f58f26 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import csv | |
| 4 import datetime | |
| 5 import hashlib | |
| 6 import json | |
| 7 import logging | |
| 8 import os | |
| 9 import re | |
| 10 import shutil | |
| 11 import subprocess | |
| 12 import xml.etree.ElementTree as ET | |
| 13 from collections import defaultdict | |
| 14 | |
| 15 import requests | |
| 16 | |
| 17 | |
| 18 logging.basicConfig(level=logging.DEBUG) | |
| 19 log = logging.getLogger("jbrowse") | |
| 20 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | |
| 21 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) | |
| 22 GALAXY_INFRASTRUCTURE_URL = None | |
| 23 mapped_chars = { | |
| 24 ">": "__gt__", | |
| 25 "<": "__lt__", | |
| 26 "'": "__sq__", | |
| 27 '"': "__dq__", | |
| 28 "[": "__ob__", | |
| 29 "]": "__cb__", | |
| 30 "{": "__oc__", | |
| 31 "}": "__cc__", | |
| 32 "@": "__at__", | |
| 33 "#": "__pd__", | |
| 34 "": "__cn__", | |
| 35 } | |
| 36 | |
| 37 | |
| 38 def etree_to_dict(t): | |
| 39 if t is None: | |
| 40 return {} | |
| 41 | |
| 42 d = {t.tag: {} if t.attrib else None} | |
| 43 children = list(t) | |
| 44 if children: | |
| 45 dd = defaultdict(list) | |
| 46 for dc in map(etree_to_dict, children): | |
| 47 for k, v in dc.items(): | |
| 48 dd[k].append(v) | |
| 49 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}} | |
| 50 if t.attrib: | |
| 51 d[t.tag].update(("@" + k, v) for k, v in t.attrib.items()) | |
| 52 if t.text: | |
| 53 text = t.text.strip() | |
| 54 if children or t.attrib: | |
| 55 if text: | |
| 56 d[t.tag]["#text"] = text | |
| 57 else: | |
| 58 d[t.tag] = text | |
| 59 return d | |
| 60 | |
| 61 | |
| 62 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | |
| 63 | |
| 64 | |
| 65 def metadata_from_node(node): | |
| 66 metadata = {} | |
| 67 | |
| 68 if len(node.findall("dataset")) == 1: | |
| 69 | |
| 70 for key, value in node.findall("dataset")[0].attrib.items(): | |
| 71 metadata[f"dataset_{key}"] = value | |
| 72 | |
| 73 for key, value in node.findall("history")[0].attrib.items(): | |
| 74 metadata[f"history_{key}"] = value | |
| 75 | |
| 76 for key, value in node.findall("metadata")[0].attrib.items(): | |
| 77 metadata[f"metadata_{key}"] = value | |
| 78 | |
| 79 for key, value in node.findall("tool")[0].attrib.items(): | |
| 80 metadata[f"tool_{key}"] = value | |
| 81 | |
| 82 # Additional Mappings applied: | |
| 83 metadata[ | |
| 84 "dataset_edam_format" | |
| 85 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( | |
| 86 metadata["dataset_edam_format"], metadata["dataset_file_ext"] | |
| 87 ) | |
| 88 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( | |
| 89 metadata["history_user_email"] | |
| 90 ) | |
| 91 metadata[ | |
| 92 "history_display_name" | |
| 93 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( | |
| 94 galaxy=GALAXY_INFRASTRUCTURE_URL, | |
| 95 encoded_hist_id=metadata["history_id"], | |
| 96 hist_name=metadata["history_display_name"], | |
| 97 ) | |
| 98 metadata[ | |
| 99 "tool_tool" | |
| 100 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format( | |
| 101 galaxy=GALAXY_INFRASTRUCTURE_URL, | |
| 102 encoded_id=metadata["dataset_id"], | |
| 103 tool_id=metadata["tool_tool_id"], | |
| 104 # tool_version=metadata['tool_tool_version'], | |
| 105 ) | |
| 106 | |
| 107 # Load additional metadata from a TSV file if any given by user | |
| 108 bonus = node.findall("bonus") | |
| 109 if bonus and "src" in bonus[0].attrib and bonus[0].attrib["src"]: | |
| 110 with open(bonus[0].attrib["src"], "r") as bonus_tsv: | |
| 111 bonus_content = csv.reader(bonus_tsv, delimiter="\t", quotechar='"') | |
| 112 for row in bonus_content: | |
| 113 if len(row) == 2: | |
| 114 if row[0] in metadata: | |
| 115 log.warning(f"Overwriting existing metadata {row[0]} with value from bonus file {row[1]}") | |
| 116 metadata[row[0]] = row[1] | |
| 117 else: | |
| 118 log.warning(f"Skipping invalid bonus metadata line: {row}") | |
| 119 | |
| 120 return metadata | |
| 121 | |
| 122 | |
| 123 class JbrowseConnector(object): | |
| 124 def __init__(self, jbrowse, outdir, update): | |
| 125 self.jbrowse = jbrowse | |
| 126 self.outdir = outdir | |
| 127 self.update = update | |
| 128 | |
| 129 self.tracksToIndex = {} | |
| 130 | |
| 131 # This is the id of the current assembly | |
| 132 self.assembly_ids = {} | |
| 133 | |
| 134 self.default_views = {} | |
| 135 | |
| 136 self.plugins = [] | |
| 137 | |
| 138 self.use_synteny_viewer = False | |
| 139 | |
| 140 self.synteny_tracks = [] | |
| 141 | |
| 142 self.clone_jbrowse(self.jbrowse, self.outdir) | |
| 143 | |
| 144 # If upgrading, look at the existing data | |
| 145 self.check_existing(self.outdir) | |
| 146 | |
| 147 def get_cwd(self, cwd): | |
| 148 if cwd: | |
| 149 return self.outdir | |
| 150 else: | |
| 151 return subprocess.check_output(['pwd']).decode('utf-8').strip() | |
| 152 # return None | |
| 153 | |
| 154 def subprocess_check_call(self, command, output=None, cwd=True): | |
| 155 if output: | |
| 156 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)} > {output.name}") | |
| 157 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output) | |
| 158 else: | |
| 159 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}") | |
| 160 subprocess.check_call(command, cwd=self.get_cwd(cwd)) | |
| 161 | |
| 162 def subprocess_popen(self, command, cwd=True): | |
| 163 log.debug(f"cd {self.get_cwd(cwd)} && {command}") | |
| 164 p = subprocess.Popen( | |
| 165 command, | |
| 166 cwd=self.get_cwd(cwd), | |
| 167 shell=True, | |
| 168 stdin=subprocess.PIPE, | |
| 169 stdout=subprocess.PIPE, | |
| 170 stderr=subprocess.PIPE, | |
| 171 ) | |
| 172 output, err = p.communicate() | |
| 173 retcode = p.returncode | |
| 174 if retcode != 0: | |
| 175 log.error(f"cd {self.get_cwd(cwd)} && {command}") | |
| 176 log.error(output) | |
| 177 log.error(err) | |
| 178 raise RuntimeError(f"Command failed with exit code {retcode}") | |
| 179 | |
| 180 def subprocess_check_output(self, command, cwd=True): | |
| 181 log.debug(f"cd {self.get_cwd(cwd)} && {' '.join(command)}") | |
| 182 return subprocess.check_output(command, cwd=self.get_cwd(cwd)) | |
| 183 | |
| 184 def symlink_or_copy(self, src, dest): | |
| 185 # Use to support symlinking in jbrowse1, in jbrowse2 prefer to use remote uri | |
| 186 cmd = ["cp", src, dest] | |
| 187 | |
| 188 return self.subprocess_check_call(cmd) | |
| 189 | |
| 190 def _prepare_track_style(self, xml_conf): | |
| 191 style_data = { | |
| 192 "type": "LinearBasicDisplay", # No ideal default, but should be overwritten anyway | |
| 193 } | |
| 194 | |
| 195 if "display" in xml_conf["style"]: | |
| 196 style_data["type"] = xml_conf["style"]["display"] | |
| 197 | |
| 198 style_data["displayId"] = f"{xml_conf['label']}_{style_data['type']}" | |
| 199 | |
| 200 style_data.update(self._prepare_renderer_config(style_data["type"], xml_conf["style"])) | |
| 201 | |
| 202 return {"displays": [style_data]} | |
| 203 | |
| 204 def _prepare_renderer_config(self, display_type, xml_conf): | |
| 205 | |
| 206 style_data = {} | |
| 207 | |
| 208 # if display_type in ("LinearBasicDisplay", "LinearVariantDisplay"): | |
| 209 # TODO LinearVariantDisplay does not understand these options when written in config.json | |
| 210 if display_type in ("LinearBasicDisplay"): | |
| 211 | |
| 212 # Doc: https://jbrowse.org/jb2/docs/config/svgfeaturerenderer/ | |
| 213 style_data["renderer"] = { | |
| 214 "type": "SvgFeatureRenderer", | |
| 215 "showLabels": xml_conf.get("show_labels", True), | |
| 216 "showDescriptions": xml_conf.get("show_descriptions", True), | |
| 217 "labels": { | |
| 218 "name": xml_conf.get("labels_name", "jexl:get(feature,'name') || get(feature,'id')"), | |
| 219 "description": xml_conf.get("descriptions_name", "jexl:get(feature,'note') || get(feature,'description')") | |
| 220 }, | |
| 221 "displayMode": xml_conf.get("display_mode", "normal"), | |
| 222 "maxHeight": xml_conf.get("max_height", 1200), | |
| 223 } | |
| 224 | |
| 225 elif display_type == "LinearArcDisplay": | |
| 226 | |
| 227 # Doc: https://jbrowse.org/jb2/docs/config/arcrenderer/ | |
| 228 style_data["renderer"] = { | |
| 229 "type": "ArcRenderer", | |
| 230 "label": xml_conf.get("labels_name", "jexl:get(feature,'score')"), | |
| 231 "displayMode": xml_conf.get("display_mode", "arcs"), | |
| 232 } | |
| 233 | |
| 234 elif display_type == "LinearWiggleDisplay": | |
| 235 | |
| 236 wig_renderer = xml_conf.get("renderer", "xyplot") | |
| 237 style_data["defaultRendering"] = wig_renderer | |
| 238 | |
| 239 elif display_type == "MultiLinearWiggleDisplay": | |
| 240 | |
| 241 wig_renderer = xml_conf.get("renderer", "multirowxy") | |
| 242 style_data["defaultRendering"] = wig_renderer | |
| 243 | |
| 244 elif display_type == "LinearSNPCoverageDisplay": | |
| 245 | |
| 246 # Does not work | |
| 247 # style_data["renderer"] = { | |
| 248 # "type": "SNPCoverageRenderer", | |
| 249 # "displayCrossHatches": xml_conf.get("display_cross_hatches", True), | |
| 250 # } | |
| 251 | |
| 252 style_data["scaleType"] = xml_conf.get("scale_type", "linear") | |
| 253 if "min_score" in xml_conf: | |
| 254 style_data["minScore"] = xml_conf["min_score"] | |
| 255 | |
| 256 if "max_score" in xml_conf: | |
| 257 style_data["maxScore"] = xml_conf["max_score"] | |
| 258 | |
| 259 # Doc: https://jbrowse.org/jb2/docs/config/snpcoveragerenderer/ | |
| 260 | |
| 261 return style_data | |
| 262 | |
| 263 def _prepare_format_details(self, xml_conf): | |
| 264 formatDetails = { | |
| 265 } | |
| 266 | |
| 267 if "feature" in xml_conf["formatdetails"]: | |
| 268 feat_jexl = xml_conf["formatdetails"]["feature"] | |
| 269 for key, value in mapped_chars.items(): | |
| 270 feat_jexl = feat_jexl.replace(value, key) | |
| 271 formatDetails["feature"] = feat_jexl | |
| 272 | |
| 273 if "subfeature" in xml_conf["formatdetails"]: | |
| 274 sfeat_jexl = xml_conf["formatdetails"]["subfeature"] | |
| 275 for key, value in mapped_chars.items(): | |
| 276 sfeat_jexl = sfeat_jexl.replace(value, key) | |
| 277 formatDetails["subfeatures"] = sfeat_jexl | |
| 278 | |
| 279 if "depth" in xml_conf["formatdetails"]: | |
| 280 formatDetails["depth"] = int(xml_conf["formatdetails"]["depth"]) | |
| 281 | |
| 282 return {"formatDetails": formatDetails} | |
| 283 | |
| 284 def _prepare_track_metadata(self, xml_conf): | |
| 285 metadata = { | |
| 286 } | |
| 287 | |
| 288 metadata = xml_conf["metadata"] | |
| 289 | |
| 290 return {"metadata": metadata} | |
| 291 | |
| 292 def check_existing(self, destination): | |
| 293 existing = os.path.join(destination, "config.json") | |
| 294 if os.path.exists(existing): | |
| 295 with open(existing, "r") as existing_conf: | |
| 296 conf = json.load(existing_conf) | |
| 297 if "assemblies" in conf: | |
| 298 for assembly in conf["assemblies"]: | |
| 299 if "name" in assembly: | |
| 300 | |
| 301 # Look for a default scaffold | |
| 302 default_seq = None | |
| 303 if 'defaultSession' in conf and 'views' in conf['defaultSession']: | |
| 304 for view in conf['defaultSession']['views']: | |
| 305 if 'init' in view and 'assembly' in view['init'] and 'loc' in view['init']: | |
| 306 if view['init']['assembly'] == assembly["name"]: | |
| 307 default_seq = view['init']['loc'].split(":")[0] | |
| 308 if "views" in view: | |
| 309 subviews = view["views"] | |
| 310 for subview in subviews: | |
| 311 if 'init' in subview and 'assembly' in subview['init'] and 'loc' in subview['init']: | |
| 312 if subview['init']['assembly'] == assembly["name"]: | |
| 313 default_seq = subview['init']['loc'].split(":")[0] | |
| 314 | |
| 315 self.assembly_ids[assembly["name"]] = default_seq | |
| 316 | |
| 317 def _load_old_genome_views(self): | |
| 318 | |
| 319 views = {} | |
| 320 | |
| 321 config_path = os.path.join(self.outdir, "config.json") | |
| 322 with open(config_path, "r") as config_file: | |
| 323 config_json = json.load(config_file) | |
| 324 | |
| 325 # Find default synteny views existing from a previous jbrowse dataset | |
| 326 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']: | |
| 327 for view in config_json['defaultSession']['views']: | |
| 328 if view['type'] != "LinearSyntenyView": | |
| 329 if 'init' in view and 'assembly' in view['init']: | |
| 330 views[view['init']['assembly']] = view | |
| 331 | |
| 332 return views | |
| 333 | |
| 334 def _load_old_synteny_views(self): | |
| 335 | |
| 336 views = [] | |
| 337 | |
| 338 config_path = os.path.join(self.outdir, "config.json") | |
| 339 with open(config_path, "r") as config_file: | |
| 340 config_json = json.load(config_file) | |
| 341 | |
| 342 # Find default synteny views existing from a previous jbrowse dataset | |
| 343 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']: | |
| 344 for view in config_json['defaultSession']['views']: | |
| 345 if view['type'] == "LinearSyntenyView": | |
| 346 views.append(view) | |
| 347 | |
| 348 return views | |
| 349 | |
| 350 def add_assembly(self, path, label, is_remote=False, cytobands=None, ref_name_aliases=None): | |
| 351 | |
| 352 if not is_remote: | |
| 353 # Find a non-existing filename for the new genome | |
| 354 # (to avoid colision when upgrading an existing instance) | |
| 355 rel_seq_path = os.path.join("data", label) | |
| 356 seq_path = os.path.join(self.outdir, rel_seq_path) | |
| 357 fn_try = 1 | |
| 358 while ( | |
| 359 os.path.exists(seq_path + ".fasta") | |
| 360 or os.path.exists(seq_path + ".fasta.gz") | |
| 361 or os.path.exists(seq_path + ".fasta.gz.fai") | |
| 362 or os.path.exists(seq_path + ".fasta.gz.gzi") | |
| 363 ): | |
| 364 rel_seq_path = os.path.join("data", f"{label}{fn_try}") | |
| 365 seq_path = os.path.join(self.outdir, rel_seq_path) | |
| 366 fn_try += 1 | |
| 367 | |
| 368 # Check if the assembly already exists from a previous run (--update mode) | |
| 369 if self.update: | |
| 370 | |
| 371 config_path = os.path.join(self.outdir, "config.json") | |
| 372 with open(config_path, "r") as config_file: | |
| 373 config_json = json.load(config_file) | |
| 374 | |
| 375 for asby in config_json['assemblies']: | |
| 376 if asby['name'] == label: | |
| 377 | |
| 378 # Find default views existing for this assembly | |
| 379 if 'defaultSession' in config_json and 'views' in config_json['defaultSession']: | |
| 380 for view in config_json['defaultSession']['views']: | |
| 381 if 'init' in view and 'assembly' in view['init']: | |
| 382 if view['init']['assembly'] == label: | |
| 383 | |
| 384 log.info("Found existing assembly from existing JBrowse2 instance, preserving it") | |
| 385 | |
| 386 self.default_views[view['init']['assembly']] = view | |
| 387 | |
| 388 return label | |
| 389 | |
| 390 # Copy ref alias file if any | |
| 391 if ref_name_aliases: | |
| 392 copied_ref_name_aliases = seq_path + ".aliases" | |
| 393 shutil.copy(ref_name_aliases, copied_ref_name_aliases) | |
| 394 copied_ref_name_aliases = rel_seq_path + ".aliases" | |
| 395 | |
| 396 # Copy cytobands file if any | |
| 397 if cytobands: | |
| 398 copied_cytobands = seq_path + ".cytobands" | |
| 399 shutil.copy(cytobands, copied_cytobands) | |
| 400 copied_cytobands = rel_seq_path + ".cytobands" | |
| 401 | |
| 402 # Find a non-existing label for the new genome | |
| 403 # (to avoid colision when upgrading an existing instance) | |
| 404 lab_try = 1 | |
| 405 uniq_label = label | |
| 406 while uniq_label in self.assembly_ids: | |
| 407 uniq_label = label + str(lab_try) | |
| 408 lab_try += 1 | |
| 409 | |
| 410 if is_remote: | |
| 411 | |
| 412 # Find a default scaffold to display | |
| 413 with requests.get(path + ".fai", stream=True) as response: | |
| 414 response.raise_for_status() | |
| 415 first_seq = next(response.iter_lines()) | |
| 416 first_seq = first_seq.decode("utf-8").split('\t')[0] | |
| 417 | |
| 418 self.assembly_ids[uniq_label] = first_seq | |
| 419 | |
| 420 # We assume we just need to suffix url with .fai and .gzi for indexes. | |
| 421 cmd_jb = [ | |
| 422 "jbrowse", | |
| 423 "add-assembly", | |
| 424 "--name", | |
| 425 uniq_label, | |
| 426 "--type", | |
| 427 "bgzipFasta", | |
| 428 "--out", | |
| 429 self.outdir, | |
| 430 "--skipCheck", | |
| 431 ] | |
| 432 | |
| 433 if ref_name_aliases: | |
| 434 cmd_jb.extend([ | |
| 435 "--refNameAliases", | |
| 436 copied_ref_name_aliases, | |
| 437 ]) | |
| 438 | |
| 439 cmd_jb.append(path) # Path is an url in remote mode | |
| 440 | |
| 441 self.subprocess_check_call(cmd_jb) | |
| 442 else: | |
| 443 # Find a default scaffold to display | |
| 444 with open(path, "r") as fa_handle: | |
| 445 fa_header = fa_handle.readline()[1:].strip().split(" ")[0] | |
| 446 | |
| 447 self.assembly_ids[uniq_label] = fa_header | |
| 448 | |
| 449 copied_genome = seq_path + ".fasta" | |
| 450 shutil.copy(path, copied_genome) | |
| 451 | |
| 452 # Compress with bgzip | |
| 453 cmd = ["bgzip", copied_genome] | |
| 454 self.subprocess_check_call(cmd) | |
| 455 | |
| 456 # FAI Index | |
| 457 cmd = ["samtools", "faidx", copied_genome + ".gz"] | |
| 458 self.subprocess_check_call(cmd) | |
| 459 | |
| 460 cmd_jb = [ | |
| 461 "jbrowse", | |
| 462 "add-assembly", | |
| 463 "--load", | |
| 464 "inPlace", | |
| 465 "--name", | |
| 466 uniq_label, | |
| 467 "--type", | |
| 468 "bgzipFasta", | |
| 469 "--out", | |
| 470 self.outdir, | |
| 471 "--skipCheck", | |
| 472 ] | |
| 473 | |
| 474 if ref_name_aliases: | |
| 475 cmd_jb.extend([ | |
| 476 "--refNameAliases", | |
| 477 copied_ref_name_aliases, | |
| 478 ]) | |
| 479 | |
| 480 cmd_jb.append(rel_seq_path + ".fasta.gz") | |
| 481 | |
| 482 self.subprocess_check_call(cmd_jb) | |
| 483 | |
| 484 if cytobands: | |
| 485 self.add_cytobands(uniq_label, copied_cytobands) | |
| 486 | |
| 487 return uniq_label | |
| 488 | |
| 489 def add_cytobands(self, assembly_name, cytobands_path): | |
| 490 | |
| 491 config_path = os.path.join(self.outdir, "config.json") | |
| 492 with open(config_path, "r") as config_file: | |
| 493 config_json = json.load(config_file) | |
| 494 | |
| 495 config_data = {} | |
| 496 | |
| 497 config_data["cytobands"] = { | |
| 498 "adapter": { | |
| 499 "type": "CytobandAdapter", | |
| 500 "cytobandLocation": { | |
| 501 "uri": cytobands_path | |
| 502 } | |
| 503 } | |
| 504 } | |
| 505 | |
| 506 filled_assemblies = [] | |
| 507 for assembly in config_json["assemblies"]: | |
| 508 if assembly["name"] == assembly_name: | |
| 509 assembly.update(config_data) | |
| 510 filled_assemblies.append(assembly) | |
| 511 config_json["assemblies"] = filled_assemblies | |
| 512 | |
| 513 with open(config_path, "w") as config_file: | |
| 514 json.dump(config_json, config_file, indent=2) | |
| 515 | |
| 516 def text_index(self): | |
| 517 | |
| 518 for ass in self.tracksToIndex: | |
| 519 tracks = self.tracksToIndex[ass] | |
| 520 args = [ | |
| 521 "jbrowse", | |
| 522 "text-index", | |
| 523 "--target", | |
| 524 self.outdir, | |
| 525 "--assemblies", | |
| 526 ass, | |
| 527 ] | |
| 528 | |
| 529 tracks = ",".join(tracks) | |
| 530 if tracks: | |
| 531 args += ["--tracks", tracks] | |
| 532 | |
| 533 log.info(f"-----> Running text-index on assembly {ass} and tracks {tracks}") | |
| 534 | |
| 535 # Only run index if we want to index at least one | |
| 536 # If --tracks is not specified, it will index everything | |
| 537 self.subprocess_check_call(args) | |
| 538 | |
| 539 def add_gc_content(self, parent, trackData, **kwargs): | |
| 540 | |
| 541 adapter = {} | |
| 542 existing = os.path.join(self.outdir, "config.json") | |
| 543 if os.path.exists(existing): | |
| 544 with open(existing, "r") as existing_conf: | |
| 545 conf = json.load(existing_conf) | |
| 546 if "assemblies" in conf: | |
| 547 for assembly in conf["assemblies"]: | |
| 548 if assembly.get('name', "") == parent['uniq_id']: | |
| 549 adapter = assembly.get('sequence', {}).get('adapter', {}) | |
| 550 | |
| 551 json_track_data = { | |
| 552 "type": "GCContentTrack", | |
| 553 "trackId": trackData["label"], | |
| 554 "name": trackData["key"], | |
| 555 "adapter": adapter, | |
| 556 "category": [trackData["category"]], | |
| 557 "assemblyNames": [parent['uniq_id']], | |
| 558 } | |
| 559 | |
| 560 style_json = self._prepare_track_style(trackData) | |
| 561 | |
| 562 json_track_data.update(style_json) | |
| 563 | |
| 564 self.subprocess_check_call( | |
| 565 [ | |
| 566 "jbrowse", | |
| 567 "add-track-json", | |
| 568 "--target", | |
| 569 self.outdir, | |
| 570 json.dumps(json_track_data), | |
| 571 ] | |
| 572 ) | |
| 573 | |
| 574 def add_bigwig(self, parent, data, trackData, wiggleOpts, **kwargs): | |
| 575 | |
| 576 if trackData['remote']: | |
| 577 rel_dest = data | |
| 578 else: | |
| 579 rel_dest = os.path.join("data", trackData["label"] + ".bw") | |
| 580 dest = os.path.join(self.outdir, rel_dest) | |
| 581 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 582 | |
| 583 style_json = self._prepare_track_style(trackData) | |
| 584 | |
| 585 track_metadata = self._prepare_track_metadata(trackData) | |
| 586 | |
| 587 style_json.update(track_metadata) | |
| 588 | |
| 589 self._add_track( | |
| 590 trackData["label"], | |
| 591 trackData["key"], | |
| 592 trackData["category"], | |
| 593 rel_dest, | |
| 594 parent, | |
| 595 config=style_json, | |
| 596 remote=trackData['remote'] | |
| 597 ) | |
| 598 | |
| 599 def add_bigwig_multi(self, parent, data_files, trackData, wiggleOpts, **kwargs): | |
| 600 | |
| 601 subadapters = [] | |
| 602 | |
| 603 sub_num = 0 | |
| 604 for data in data_files: | |
| 605 if trackData['remote']: | |
| 606 rel_dest = data[1] | |
| 607 else: | |
| 608 rel_dest = os.path.join("data", f"{trackData['label']}_sub{sub_num}.bw") | |
| 609 dest = os.path.join(self.outdir, rel_dest) | |
| 610 self.symlink_or_copy(os.path.realpath(data[1]), dest) | |
| 611 | |
| 612 subadapters.append({ | |
| 613 "type": "BigWigAdapter", | |
| 614 "name": data[0], | |
| 615 "bigWigLocation": { | |
| 616 "uri": rel_dest, | |
| 617 "locationType": "UriLocation" | |
| 618 } | |
| 619 }) | |
| 620 sub_num += 1 | |
| 621 | |
| 622 json_track_data = { | |
| 623 "type": "MultiQuantitativeTrack", | |
| 624 "trackId": trackData["label"], | |
| 625 "name": trackData["key"], | |
| 626 "adapter": { | |
| 627 "type": "MultiWiggleAdapter", | |
| 628 "subadapters": subadapters | |
| 629 }, | |
| 630 "category": [trackData["category"]], | |
| 631 "assemblyNames": [parent['uniq_id']], | |
| 632 } | |
| 633 | |
| 634 style_json = self._prepare_track_style(trackData) | |
| 635 | |
| 636 json_track_data.update(style_json) | |
| 637 | |
| 638 track_metadata = self._prepare_track_metadata(trackData) | |
| 639 | |
| 640 json_track_data.update(track_metadata) | |
| 641 | |
| 642 self.subprocess_check_call( | |
| 643 [ | |
| 644 "jbrowse", | |
| 645 "add-track-json", | |
| 646 "--target", | |
| 647 self.outdir, | |
| 648 json.dumps(json_track_data), | |
| 649 ] | |
| 650 ) | |
| 651 | |
| 652 # Anything ending in "am" (Bam or Cram) | |
| 653 def add_xam(self, parent, data, trackData, xamOpts, index=None, ext="bam", **kwargs): | |
| 654 index_ext = "bai" | |
| 655 if ext == "cram": | |
| 656 index_ext = "crai" | |
| 657 | |
| 658 if trackData['remote']: | |
| 659 rel_dest = data | |
| 660 # Index will be set automatically as xam url + xai .suffix by add-track cmd | |
| 661 else: | |
| 662 rel_dest = os.path.join("data", trackData["label"] + f".{ext}") | |
| 663 dest = os.path.join(self.outdir, rel_dest) | |
| 664 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 665 | |
| 666 if index is not None and os.path.exists(os.path.realpath(index)): | |
| 667 # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
| 668 self.subprocess_check_call( | |
| 669 ["cp", os.path.realpath(index), dest + f".{index_ext}"] | |
| 670 ) | |
| 671 else: | |
| 672 # Can happen in exotic condition | |
| 673 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
| 674 # => no index generated by galaxy, but there might be one next to the symlink target | |
| 675 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
| 676 if os.path.exists(os.path.realpath(data) + f".{index_ext}"): | |
| 677 self.symlink_or_copy( | |
| 678 os.path.realpath(data) + f".{index_ext}", dest + f".{index_ext}" | |
| 679 ) | |
| 680 else: | |
| 681 log.warn( | |
| 682 f"Could not find a bam index (.{index_ext} file) for {data}" | |
| 683 ) | |
| 684 | |
| 685 style_json = self._prepare_track_style(trackData) | |
| 686 | |
| 687 track_metadata = self._prepare_track_metadata(trackData) | |
| 688 | |
| 689 style_json.update(track_metadata) | |
| 690 | |
| 691 self._add_track( | |
| 692 trackData["label"], | |
| 693 trackData["key"], | |
| 694 trackData["category"], | |
| 695 rel_dest, | |
| 696 parent, | |
| 697 config=style_json, | |
| 698 remote=trackData['remote'] | |
| 699 ) | |
| 700 | |
| 701 def add_vcf(self, parent, data, trackData, vcfOpts={}, zipped=False, **kwargs): | |
| 702 if trackData['remote']: | |
| 703 rel_dest = data | |
| 704 else: | |
| 705 if zipped: | |
| 706 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz") | |
| 707 dest = os.path.join(self.outdir, rel_dest) | |
| 708 shutil.copy(os.path.realpath(data), dest) | |
| 709 else: | |
| 710 rel_dest = os.path.join("data", trackData["label"] + ".vcf") | |
| 711 dest = os.path.join(self.outdir, rel_dest) | |
| 712 shutil.copy(os.path.realpath(data), dest) | |
| 713 | |
| 714 cmd = ["bgzip", dest] | |
| 715 self.subprocess_check_call(cmd) | |
| 716 cmd = ["tabix", dest + ".gz"] | |
| 717 self.subprocess_check_call(cmd) | |
| 718 | |
| 719 rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz") | |
| 720 | |
| 721 style_json = self._prepare_track_style(trackData) | |
| 722 | |
| 723 formatdetails = self._prepare_format_details(trackData) | |
| 724 | |
| 725 style_json.update(formatdetails) | |
| 726 | |
| 727 track_metadata = self._prepare_track_metadata(trackData) | |
| 728 | |
| 729 style_json.update(track_metadata) | |
| 730 | |
| 731 self._add_track( | |
| 732 trackData["label"], | |
| 733 trackData["key"], | |
| 734 trackData["category"], | |
| 735 rel_dest, | |
| 736 parent, | |
| 737 config=style_json, | |
| 738 remote=trackData['remote'] | |
| 739 ) | |
| 740 | |
| 741 def add_gff(self, parent, data, format, trackData, gffOpts, **kwargs): | |
| 742 if trackData['remote']: | |
| 743 rel_dest = data | |
| 744 else: | |
| 745 rel_dest = os.path.join("data", trackData["label"] + ".gff") | |
| 746 dest = os.path.join(self.outdir, rel_dest) | |
| 747 rel_dest = rel_dest + ".gz" | |
| 748 | |
| 749 self._sort_gff(data, dest) | |
| 750 | |
| 751 style_json = self._prepare_track_style(trackData) | |
| 752 | |
| 753 formatdetails = self._prepare_format_details(trackData) | |
| 754 | |
| 755 style_json.update(formatdetails) | |
| 756 | |
| 757 track_metadata = self._prepare_track_metadata(trackData) | |
| 758 | |
| 759 style_json.update(track_metadata) | |
| 760 | |
| 761 if gffOpts.get('index', 'false') in ("yes", "true", "True"): | |
| 762 if parent['uniq_id'] not in self.tracksToIndex: | |
| 763 self.tracksToIndex[parent['uniq_id']] = [] | |
| 764 self.tracksToIndex[parent['uniq_id']].append(trackData["label"]) | |
| 765 | |
| 766 self._add_track( | |
| 767 trackData["label"], | |
| 768 trackData["key"], | |
| 769 trackData["category"], | |
| 770 rel_dest, | |
| 771 parent, | |
| 772 config=style_json, | |
| 773 remote=trackData['remote'] | |
| 774 ) | |
| 775 | |
| 776 def add_bed(self, parent, data, format, trackData, gffOpts, **kwargs): | |
| 777 if trackData['remote']: | |
| 778 rel_dest = data | |
| 779 else: | |
| 780 rel_dest = os.path.join("data", trackData["label"] + ".bed") | |
| 781 dest = os.path.join(self.outdir, rel_dest) | |
| 782 rel_dest = rel_dest + ".gz" | |
| 783 | |
| 784 self._sort_bed(data, dest) | |
| 785 | |
| 786 style_json = self._prepare_track_style(trackData) | |
| 787 | |
| 788 formatdetails = self._prepare_format_details(trackData) | |
| 789 | |
| 790 style_json.update(formatdetails) | |
| 791 | |
| 792 track_metadata = self._prepare_track_metadata(trackData) | |
| 793 | |
| 794 style_json.update(track_metadata) | |
| 795 | |
| 796 if gffOpts.get('index', 'false') in ("yes", "true", "True"): | |
| 797 if parent['uniq_id'] not in self.tracksToIndex: | |
| 798 self.tracksToIndex[parent['uniq_id']] = [] | |
| 799 self.tracksToIndex[parent['uniq_id']].append(trackData["label"]) | |
| 800 | |
| 801 self._add_track( | |
| 802 trackData["label"], | |
| 803 trackData["key"], | |
| 804 trackData["category"], | |
| 805 rel_dest, | |
| 806 parent, | |
| 807 config=style_json, | |
| 808 remote=trackData['remote'] | |
| 809 ) | |
| 810 | |
| 811 def add_paf(self, parent, data, trackData, pafOpts, **kwargs): | |
| 812 | |
| 813 if trackData['remote']: | |
| 814 rel_dest = data | |
| 815 | |
| 816 if rel_dest.endswith('pif') or rel_dest.endswith('pif.gz'): | |
| 817 adapter = "pif" | |
| 818 else: | |
| 819 adapter = "paf" | |
| 820 else: | |
| 821 rel_dest = os.path.join("data", trackData["label"] + ".pif.gz") | |
| 822 dest = os.path.join(self.outdir, rel_dest) | |
| 823 | |
| 824 cmd = ["jbrowse", "make-pif", "--out", dest, os.path.realpath(data)] | |
| 825 self.subprocess_check_call(cmd) | |
| 826 | |
| 827 adapter = "pif" | |
| 828 | |
| 829 if trackData["style"]["display"] == "LinearBasicDisplay": | |
| 830 # Normal style track | |
| 831 | |
| 832 json_track_data = { | |
| 833 "type": "SyntenyTrack", | |
| 834 "trackId": trackData["label"], | |
| 835 "name": trackData["key"], | |
| 836 "adapter": { | |
| 837 "type": "PairwiseIndexedPAFAdapter", | |
| 838 "pifGzLocation": { | |
| 839 "uri": rel_dest, | |
| 840 }, | |
| 841 "index": { | |
| 842 "location": { | |
| 843 "uri": rel_dest + ".tbi", | |
| 844 } | |
| 845 }, | |
| 846 }, | |
| 847 "category": [trackData["category"]], | |
| 848 "assemblyNames": [parent['uniq_id']], | |
| 849 } | |
| 850 else: | |
| 851 # Synteny viewer | |
| 852 | |
| 853 json_track_data = { | |
| 854 "type": "SyntenyTrack", | |
| 855 "trackId": trackData["label"], | |
| 856 "name": trackData["key"], | |
| 857 "adapter": { | |
| 858 "assemblyNames": [ | |
| 859 parent['uniq_id'], | |
| 860 "", # Placeholder until we know the next genome id | |
| 861 ], | |
| 862 }, | |
| 863 "category": [trackData["category"]], | |
| 864 "assemblyNames": [ | |
| 865 parent['uniq_id'], | |
| 866 "", # Placeholder until we know the next genome id | |
| 867 ] | |
| 868 } | |
| 869 | |
| 870 if adapter == "pif": | |
| 871 json_track_data["adapter"].update({ | |
| 872 "type": "PairwiseIndexedPAFAdapter", | |
| 873 "pifGzLocation": { | |
| 874 "uri": rel_dest, | |
| 875 }, | |
| 876 "index": { | |
| 877 "location": { | |
| 878 "uri": rel_dest + ".tbi", | |
| 879 } | |
| 880 }, | |
| 881 }) | |
| 882 else: | |
| 883 json_track_data["adapter"].update({ | |
| 884 "type": "PAFAdapter", | |
| 885 "pafLocation": { | |
| 886 "uri": rel_dest, | |
| 887 }, | |
| 888 }) | |
| 889 | |
| 890 style_json = self._prepare_track_style(trackData) | |
| 891 | |
| 892 json_track_data.update(style_json) | |
| 893 | |
| 894 track_metadata = self._prepare_track_metadata(trackData) | |
| 895 | |
| 896 json_track_data.update(track_metadata) | |
| 897 | |
| 898 if trackData["style"]["display"] == "LinearBasicDisplay": | |
| 899 self.subprocess_check_call( | |
| 900 [ | |
| 901 "jbrowse", | |
| 902 "add-track-json", | |
| 903 "--target", | |
| 904 self.outdir, | |
| 905 json.dumps(json_track_data), | |
| 906 ] | |
| 907 ) | |
| 908 else: | |
| 909 self.synteny_tracks.append(json_track_data) | |
| 910 | |
| 911 def add_hic(self, parent, data, trackData, hicOpts, **kwargs): | |
| 912 if trackData['remote']: | |
| 913 rel_dest = data | |
| 914 else: | |
| 915 rel_dest = os.path.join("data", trackData["label"] + ".hic") | |
| 916 dest = os.path.join(self.outdir, rel_dest) | |
| 917 self.symlink_or_copy(os.path.realpath(data), dest) | |
| 918 | |
| 919 style_json = self._prepare_track_style(trackData) | |
| 920 | |
| 921 track_metadata = self._prepare_track_metadata(trackData) | |
| 922 | |
| 923 style_json.update(track_metadata) | |
| 924 | |
| 925 self._add_track( | |
| 926 trackData["label"], | |
| 927 trackData["key"], | |
| 928 trackData["category"], | |
| 929 rel_dest, | |
| 930 parent, | |
| 931 config=style_json, | |
| 932 remote=trackData['remote'] | |
| 933 ) | |
| 934 | |
| 935 def add_maf(self, parent, data, trackData, mafOpts, **kwargs): | |
| 936 | |
| 937 # Add needed plugin | |
| 938 plugin_def = { | |
| 939 "name": "MafViewer", | |
| 940 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js" | |
| 941 } | |
| 942 self.plugins.append(plugin_def) | |
| 943 | |
| 944 rel_dest = os.path.join("data", trackData["label"] + ".maf") | |
| 945 dest = os.path.join(self.outdir, rel_dest) | |
| 946 | |
| 947 assembly_name = mafOpts.get("assembly_name", "") | |
| 948 if not assembly_name: | |
| 949 # Guess from assembly | |
| 950 assembly_name = parent['uniq_id'] | |
| 951 | |
| 952 self._convert_maf(data, dest, assembly_name) | |
| 953 | |
| 954 # Extract samples list | |
| 955 mafs = open(data, "r").readlines() | |
| 956 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))] | |
| 957 samp = [x.split()[1] for x in mafss if len(x.split()) > 0] | |
| 958 sampu = list(dict.fromkeys(samp)) | |
| 959 samples = [x.split(".")[0] for x in sampu] | |
| 960 samples.sort() | |
| 961 | |
| 962 json_track_data = { | |
| 963 "type": "MafTrack", | |
| 964 "trackId": trackData["label"], | |
| 965 "name": trackData["key"], | |
| 966 "adapter": { | |
| 967 "type": "MafTabixAdapter", | |
| 968 "samples": samples, | |
| 969 "bedGzLocation": { | |
| 970 "uri": rel_dest + ".gz", | |
| 971 }, | |
| 972 "index": { | |
| 973 "location": { | |
| 974 "uri": rel_dest + ".gz.tbi", | |
| 975 }, | |
| 976 }, | |
| 977 }, | |
| 978 "category": [trackData["category"]], | |
| 979 "assemblyNames": [parent['uniq_id']], | |
| 980 } | |
| 981 | |
| 982 style_json = self._prepare_track_style(trackData) | |
| 983 | |
| 984 json_track_data.update(style_json) | |
| 985 | |
| 986 track_metadata = self._prepare_track_metadata(trackData) | |
| 987 | |
| 988 json_track_data.update(track_metadata) | |
| 989 | |
| 990 self.subprocess_check_call( | |
| 991 [ | |
| 992 "jbrowse", | |
| 993 "add-track-json", | |
| 994 "--target", | |
| 995 self.outdir, | |
| 996 json.dumps(json_track_data), | |
| 997 ] | |
| 998 ) | |
| 999 | |
| 1000 def add_sparql(self, parent, url, query, query_refnames, trackData): | |
| 1001 json_track_data = { | |
| 1002 "type": "FeatureTrack", | |
| 1003 "trackId": trackData["label"], | |
| 1004 "name": trackData["key"], | |
| 1005 "adapter": { | |
| 1006 "type": "SPARQLAdapter", | |
| 1007 "endpoint": {"uri": url, "locationType": "UriLocation"}, | |
| 1008 "queryTemplate": query, | |
| 1009 }, | |
| 1010 "category": [trackData["category"]], | |
| 1011 "assemblyNames": [parent['uniq_id']], | |
| 1012 } | |
| 1013 | |
| 1014 if query_refnames: | |
| 1015 json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames | |
| 1016 | |
| 1017 # TODO handle metadata somehow for sparql too | |
| 1018 | |
| 1019 self.subprocess_check_call( | |
| 1020 [ | |
| 1021 "jbrowse", | |
| 1022 "add-track-json", | |
| 1023 "--target", | |
| 1024 self.outdir, | |
| 1025 json.dumps(json_track_data), | |
| 1026 ] | |
| 1027 ) | |
| 1028 | |
| 1029 def _add_track(self, track_id, label, category, path, assembly, config=None, trackType=None, load_action="inPlace", assemblies=None, remote=False): | |
| 1030 """ | |
| 1031 Adds a track to config.json using Jbrowse add-track cli | |
| 1032 | |
| 1033 By default, using `--load inPlace`: the file is supposed to be already placed at the `path` relative to | |
| 1034 the outdir, `jbrowse add-track` will not touch it and trust us that the file is there and ready to use. | |
| 1035 | |
| 1036 With `load_action` parameter, you can ask `jbrowse add-track` to copy/move/symlink the file for you. | |
| 1037 Not done by default because we often need more control on file copying/symlink for specific cases (indexes, symlinks of symlinks, ...) | |
| 1038 """ | |
| 1039 | |
| 1040 cmd = [ | |
| 1041 "jbrowse", | |
| 1042 "add-track", | |
| 1043 "--name", | |
| 1044 label, | |
| 1045 "--category", | |
| 1046 category, | |
| 1047 "--target", | |
| 1048 self.outdir, | |
| 1049 "--trackId", | |
| 1050 track_id, | |
| 1051 "--assemblyNames", | |
| 1052 assemblies if assemblies else assembly['uniq_id'], | |
| 1053 ] | |
| 1054 | |
| 1055 if not remote: | |
| 1056 cmd.append("--load") | |
| 1057 cmd.append(load_action) | |
| 1058 | |
| 1059 if config: | |
| 1060 cmd.append("--config") | |
| 1061 cmd.append(json.dumps(config)) | |
| 1062 | |
| 1063 if trackType: | |
| 1064 cmd.append("--trackType") | |
| 1065 cmd.append(trackType) | |
| 1066 | |
| 1067 cmd.append(path) | |
| 1068 | |
| 1069 self.subprocess_check_call(cmd) | |
| 1070 | |
| 1071 def _sort_gff(self, data, dest): | |
| 1072 # Only index if not already done | |
| 1073 if not os.path.exists(dest): | |
| 1074 # Not using jbrowse sort-gff because it uses sort and has the problem exposed on https://github.com/tao-bioinfo/gff3sort | |
| 1075 cmd = f"gff3sort.pl --precise '{data}' | grep -v \"^$\" > '{dest}'" | |
| 1076 self.subprocess_popen(cmd, cwd=False) | |
| 1077 | |
| 1078 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False) | |
| 1079 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"], cwd=False) | |
| 1080 | |
| 1081 def _sort_bed(self, data, dest): | |
| 1082 # Only index if not already done | |
| 1083 if not os.path.exists(dest): | |
| 1084 cmd = ["sort", "-k1,1", "-k2,2n", data] | |
| 1085 with open(dest, "w") as handle: | |
| 1086 self.subprocess_check_call(cmd, output=handle) | |
| 1087 | |
| 1088 self.subprocess_check_call(["bgzip", "-f", dest]) | |
| 1089 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"]) | |
| 1090 | |
| 1091 def _convert_maf(self, data, dest, assembly_name): | |
| 1092 # Only convert if not already done | |
| 1093 if not os.path.exists(dest): | |
| 1094 | |
| 1095 dest_bed = dest + ".bed" | |
| 1096 cmd = ["python", os.path.join(SELF_LOCATION, "maf2bed.py"), assembly_name, data, dest_bed] | |
| 1097 self.subprocess_check_call(cmd, cwd=False) | |
| 1098 | |
| 1099 cmd = ["sort", "-k1,1", "-k2,2n", dest_bed] | |
| 1100 with open(dest, "w") as handle: | |
| 1101 self.subprocess_check_call(cmd, output=handle) | |
| 1102 | |
| 1103 self.subprocess_check_call(["bgzip", "-f", dest], cwd=False) | |
| 1104 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"], cwd=False) | |
| 1105 | |
| 1106 def process_annotations(self, track, parent): | |
| 1107 category = track["category"].replace("__pd__date__pd__", TODAY) | |
| 1108 | |
| 1109 track_labels = [] | |
| 1110 | |
| 1111 for i, ( | |
| 1112 dataset_path, | |
| 1113 dataset_ext, | |
| 1114 track_human_label, | |
| 1115 extra_metadata, | |
| 1116 ) in enumerate(track["trackfiles"]): | |
| 1117 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) | |
| 1118 for key, value in mapped_chars.items(): | |
| 1119 track_human_label = track_human_label.replace(value, key) | |
| 1120 | |
| 1121 is_multi = type(dataset_path) is list | |
| 1122 | |
| 1123 log.info( | |
| 1124 f"-----> Processing track {category} / {track_human_label} ({dataset_ext}, {len(dataset_path) if is_multi else 1} files)" | |
| 1125 ) | |
| 1126 | |
| 1127 outputTrackConfig = { | |
| 1128 "category": category, | |
| 1129 } | |
| 1130 | |
| 1131 outputTrackConfig["key"] = track_human_label | |
| 1132 # We add extra data to hash for the case of non-file tracks | |
| 1133 if ( | |
| 1134 "conf" in track | |
| 1135 and "options" in track["conf"] | |
| 1136 and "url" in track["conf"]["options"] | |
| 1137 ): | |
| 1138 non_file_info = track["conf"]["options"]["url"] | |
| 1139 else: | |
| 1140 non_file_info = "" | |
| 1141 | |
| 1142 # I chose to use track['category'] instead of 'category' here. This | |
| 1143 # is intentional. This way re-running the tool on a different date | |
| 1144 # will not generate different hashes and make comparison of outputs | |
| 1145 # much simpler. | |
| 1146 hashData = [ | |
| 1147 str(dataset_path), | |
| 1148 track_human_label, | |
| 1149 track["category"], | |
| 1150 non_file_info, | |
| 1151 parent["uniq_id"], | |
| 1152 ] | |
| 1153 hashData = "|".join(hashData).encode("utf-8") | |
| 1154 outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + f"_{track['track_num']}_{i}" | |
| 1155 outputTrackConfig["metadata"] = extra_metadata | |
| 1156 | |
| 1157 outputTrackConfig["style"] = track["style"] | |
| 1158 | |
| 1159 outputTrackConfig["formatdetails"] = track["formatdetails"] | |
| 1160 | |
| 1161 outputTrackConfig["remote"] = track["remote"] | |
| 1162 | |
| 1163 # Guess extension for remote data | |
| 1164 if dataset_ext == "gff,gff3,bed": | |
| 1165 if dataset_path.endswith(".bed") or dataset_path.endswith(".bed.gz"): | |
| 1166 dataset_ext = "bed" | |
| 1167 else: | |
| 1168 dataset_ext = "gff" | |
| 1169 elif dataset_ext == "vcf,vcf_bgzip": | |
| 1170 if dataset_path.endswith(".vcf.gz"): | |
| 1171 dataset_ext = "vcf_bgzip" | |
| 1172 else: | |
| 1173 dataset_ext = "vcf" | |
| 1174 | |
| 1175 if dataset_ext in ("gff", "gff3"): | |
| 1176 self.add_gff( | |
| 1177 parent, | |
| 1178 dataset_path, | |
| 1179 dataset_ext, | |
| 1180 outputTrackConfig, | |
| 1181 track["conf"]["options"]["gff"], | |
| 1182 ) | |
| 1183 elif dataset_ext == "bed": | |
| 1184 self.add_bed( | |
| 1185 parent, | |
| 1186 dataset_path, | |
| 1187 dataset_ext, | |
| 1188 outputTrackConfig, | |
| 1189 track["conf"]["options"]["gff"], | |
| 1190 ) | |
| 1191 elif dataset_ext == "bigwig": | |
| 1192 if is_multi: | |
| 1193 self.add_bigwig_multi( | |
| 1194 parent, | |
| 1195 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"] | |
| 1196 ) | |
| 1197 else: | |
| 1198 self.add_bigwig( | |
| 1199 parent, | |
| 1200 dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"] | |
| 1201 ) | |
| 1202 elif dataset_ext == "maf": | |
| 1203 self.add_maf( | |
| 1204 parent, | |
| 1205 dataset_path, outputTrackConfig, track["conf"]["options"]["maf"] | |
| 1206 ) | |
| 1207 elif dataset_ext == "bam": | |
| 1208 | |
| 1209 if track["remote"]: | |
| 1210 bam_index = dataset_path + '.bai' | |
| 1211 else: | |
| 1212 real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][ | |
| 1213 "bam_index" | |
| 1214 ] | |
| 1215 if not isinstance(real_indexes, list): | |
| 1216 # <bam_indices> | |
| 1217 # <bam_index>/path/to/a.bam.bai</bam_index> | |
| 1218 # </bam_indices> | |
| 1219 # | |
| 1220 # The above will result in the 'bam_index' key containing a | |
| 1221 # string. If there are two or more indices, the container | |
| 1222 # becomes a list. Fun! | |
| 1223 real_indexes = [real_indexes] | |
| 1224 | |
| 1225 bam_index = real_indexes[i] | |
| 1226 | |
| 1227 self.add_xam( | |
| 1228 parent, | |
| 1229 dataset_path, | |
| 1230 outputTrackConfig, | |
| 1231 track["conf"]["options"]["pileup"], | |
| 1232 index=bam_index, | |
| 1233 ext="bam", | |
| 1234 ) | |
| 1235 elif dataset_ext == "cram": | |
| 1236 | |
| 1237 if track["remote"]: | |
| 1238 cram_index = dataset_path + '.crai' | |
| 1239 else: | |
| 1240 real_indexes = track["conf"]["options"]["cram"]["cram_indices"][ | |
| 1241 "cram_index" | |
| 1242 ] | |
| 1243 if not isinstance(real_indexes, list): | |
| 1244 # <bam_indices> | |
| 1245 # <bam_index>/path/to/a.bam.bai</bam_index> | |
| 1246 # </bam_indices> | |
| 1247 # | |
| 1248 # The above will result in the 'bam_index' key containing a | |
| 1249 # string. If there are two or more indices, the container | |
| 1250 # becomes a list. Fun! | |
| 1251 real_indexes = [real_indexes] | |
| 1252 | |
| 1253 cram_index = real_indexes[i] | |
| 1254 | |
| 1255 self.add_xam( | |
| 1256 parent, | |
| 1257 dataset_path, | |
| 1258 outputTrackConfig, | |
| 1259 track["conf"]["options"]["cram"], | |
| 1260 index=cram_index, | |
| 1261 ext="cram", | |
| 1262 ) | |
| 1263 elif dataset_ext == "vcf": | |
| 1264 self.add_vcf( | |
| 1265 parent, | |
| 1266 dataset_path, | |
| 1267 outputTrackConfig | |
| 1268 ) | |
| 1269 elif dataset_ext == "vcf_bgzip": | |
| 1270 self.add_vcf( | |
| 1271 parent, | |
| 1272 dataset_path, | |
| 1273 outputTrackConfig, | |
| 1274 zipped=True | |
| 1275 ) | |
| 1276 elif dataset_ext == "paf": # https://fr.wikipedia.org/wiki/Paf_le_chien | |
| 1277 self.add_paf( | |
| 1278 parent, | |
| 1279 dataset_path, | |
| 1280 outputTrackConfig, | |
| 1281 track["conf"]["options"]["synteny"] | |
| 1282 ) | |
| 1283 elif dataset_ext in ("hic"): | |
| 1284 self.add_hic( | |
| 1285 parent, | |
| 1286 dataset_path, | |
| 1287 outputTrackConfig, | |
| 1288 track["conf"]["options"]["hic"] | |
| 1289 ) | |
| 1290 elif dataset_ext == "sparql": | |
| 1291 sparql_query = track["conf"]["options"]["sparql"]["query"] | |
| 1292 for key, value in mapped_chars.items(): | |
| 1293 sparql_query = sparql_query.replace(value, key) | |
| 1294 sparql_query_refnames = track["conf"]["options"]["sparql"].get("query_refnames", "") | |
| 1295 if sparql_query_refnames: | |
| 1296 for key, value in mapped_chars.items(): | |
| 1297 sparql_query_refnames = sparql_query_refnames.replace(value, key) | |
| 1298 self.add_sparql( | |
| 1299 parent, | |
| 1300 track["conf"]["options"]["sparql"]["url"], | |
| 1301 sparql_query, | |
| 1302 sparql_query_refnames, | |
| 1303 outputTrackConfig, | |
| 1304 ) | |
| 1305 elif dataset_ext == "gc": | |
| 1306 self.add_gc_content( | |
| 1307 parent, | |
| 1308 outputTrackConfig, | |
| 1309 ) | |
| 1310 else: | |
| 1311 log.error(f"Do not know how to handle {dataset_ext}") | |
| 1312 | |
| 1313 track_labels.append(outputTrackConfig["label"]) | |
| 1314 | |
| 1315 # Return non-human label for use in other fields | |
| 1316 return track_labels | |
| 1317 | |
| 1318 def add_default_view_genome(self, genome, default_loc, tracks_on): | |
| 1319 | |
| 1320 refName = "" | |
| 1321 start = end = None | |
| 1322 if default_loc: | |
| 1323 loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", default_loc) | |
| 1324 if loc_match: | |
| 1325 refName = loc_match.group(1) | |
| 1326 start = int(loc_match.group(2)) | |
| 1327 end = int(loc_match.group(3)) | |
| 1328 | |
| 1329 if not refName and self.assembly_ids[genome['uniq_id']]: | |
| 1330 refName = self.assembly_ids[genome['uniq_id']] | |
| 1331 | |
| 1332 if start and end: | |
| 1333 loc_str = f"{refName}:{start}-{end}" | |
| 1334 else: | |
| 1335 loc_str = refName | |
| 1336 | |
| 1337 # Updating an existing jbrowse instance, merge with pre-existing view | |
| 1338 view_specs = None | |
| 1339 if self.update: | |
| 1340 for existing in self.default_views.values(): | |
| 1341 if len(existing) and existing["type"] == "LinearGenomeView": | |
| 1342 if existing['init']['assembly'] == genome['uniq_id']: | |
| 1343 view_specs = existing | |
| 1344 if loc_str: | |
| 1345 view_specs['init']['loc'] = loc_str | |
| 1346 view_specs['init']['tracks'].extend(tracks_on) | |
| 1347 | |
| 1348 if view_specs is None: # Not updating, or updating from synteny | |
| 1349 view_specs = { | |
| 1350 "type": "LinearGenomeView", | |
| 1351 "init": { | |
| 1352 "assembly": genome['uniq_id'], | |
| 1353 "loc": loc_str, | |
| 1354 "tracks": tracks_on | |
| 1355 } | |
| 1356 } | |
| 1357 | |
| 1358 return view_specs | |
| 1359 | |
| 1360 def add_default_view_synteny(self, genome_views, synteny_tracks): | |
| 1361 | |
| 1362 # Add json for cached synteny tracks | |
| 1363 # We cache them because we need to know the target genome uniq_id | |
| 1364 for strack in synteny_tracks: | |
| 1365 | |
| 1366 # Target assembly is the next genome, find its uniq_id | |
| 1367 query_assembly = strack["assemblyNames"][0] | |
| 1368 ass_uniq_ids = list(self.assembly_ids.keys()) | |
| 1369 query_index = ass_uniq_ids.index(query_assembly) | |
| 1370 target_assembly = ass_uniq_ids[query_index + 1] | |
| 1371 | |
| 1372 strack["assemblyNames"][1] = target_assembly | |
| 1373 strack["adapter"]["assemblyNames"][1] = target_assembly | |
| 1374 | |
| 1375 self.subprocess_check_call( | |
| 1376 [ | |
| 1377 "jbrowse", | |
| 1378 "add-track-json", | |
| 1379 "--target", | |
| 1380 self.outdir, | |
| 1381 json.dumps(strack), | |
| 1382 ] | |
| 1383 ) | |
| 1384 | |
| 1385 # Configure the synteny view | |
| 1386 levels = [] | |
| 1387 | |
| 1388 for strack in synteny_tracks: | |
| 1389 lev = { | |
| 1390 "type": "LinearSyntenyViewHelper", | |
| 1391 "tracks": [ | |
| 1392 { | |
| 1393 "type": "SyntenyTrack", | |
| 1394 "configuration": strack["trackId"], | |
| 1395 "displays": [ | |
| 1396 { | |
| 1397 "type": "LinearSyntenyDisplay", | |
| 1398 "configuration": strack["trackId"] + "_LinearSyntenyDisplay" | |
| 1399 } | |
| 1400 ] | |
| 1401 } | |
| 1402 ], | |
| 1403 "height": 100, | |
| 1404 "level": len(levels) | |
| 1405 } | |
| 1406 levels.append(lev) | |
| 1407 | |
| 1408 view_specs = { | |
| 1409 "type": "LinearSyntenyView", | |
| 1410 "views": genome_views, | |
| 1411 "levels": levels | |
| 1412 } | |
| 1413 | |
| 1414 return view_specs | |
| 1415 | |
| 1416 def add_default_session(self, default_views): | |
| 1417 """ | |
| 1418 Add some default session settings: set some assemblies/tracks on/off | |
| 1419 | |
| 1420 This allows to select a default view: | |
| 1421 - jb type (Linear, Circular, etc) | |
| 1422 - default location on an assembly | |
| 1423 - default tracks | |
| 1424 - ... | |
| 1425 | |
| 1426 Now using this method: | |
| 1427 https://github.com/GMOD/jbrowse-components/pull/4907 | |
| 1428 | |
| 1429 Different methods that were tested/discussed earlier: | |
| 1430 - using a defaultSession item in config.json before PR 4970: this proved to be difficult: | |
| 1431 forced to write a full session block, including hard-coded/hard-to-guess items, | |
| 1432 no good way to let Jbrowse2 display a scaffold without knowing its size | |
| 1433 - using JBrowse2 as an embedded React component in a tool-generated html file: | |
| 1434 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ... | |
| 1435 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below) | |
| 1436 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session | |
| 1437 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe | |
| 1438 | |
| 1439 Xrefs to understand the choices: | |
| 1440 https://github.com/GMOD/jbrowse-components/issues/2708 | |
| 1441 https://github.com/GMOD/jbrowse-components/discussions/3568 | |
| 1442 https://github.com/GMOD/jbrowse-components/pull/4148 | |
| 1443 """ | |
| 1444 | |
| 1445 if self.use_synteny_viewer: | |
| 1446 session_name = "Synteny" | |
| 1447 else: | |
| 1448 session_name = ', '.join(x['init']['assembly'] for x in default_views) | |
| 1449 | |
| 1450 session_spec = { | |
| 1451 "name": session_name, | |
| 1452 "views": default_views | |
| 1453 } | |
| 1454 | |
| 1455 config_path = os.path.join(self.outdir, "config.json") | |
| 1456 with open(config_path, "r") as config_file: | |
| 1457 config_json = json.load(config_file) | |
| 1458 | |
| 1459 config_json["defaultSession"].update(session_spec) | |
| 1460 | |
| 1461 with open(config_path, "w") as config_file: | |
| 1462 json.dump(config_json, config_file, indent=2) | |
| 1463 | |
| 1464 def add_general_configuration(self, data): | |
| 1465 """ | |
| 1466 Add some general configuration to the config.json file | |
| 1467 """ | |
| 1468 | |
| 1469 config_path = os.path.join(self.outdir, "config.json") | |
| 1470 with open(config_path, "r") as config_file: | |
| 1471 config_json = json.load(config_file) | |
| 1472 | |
| 1473 config_data = {} | |
| 1474 | |
| 1475 config_data["disableAnalytics"] = data.get("analytics", "false") == "true" | |
| 1476 | |
| 1477 config_data["theme"] = { | |
| 1478 "palette": { | |
| 1479 "primary": {"main": data.get("primary_color", "#0D233F")}, | |
| 1480 "secondary": {"main": data.get("secondary_color", "#721E63")}, | |
| 1481 "tertiary": {"main": data.get("tertiary_color", "#135560")}, | |
| 1482 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")}, | |
| 1483 }, | |
| 1484 "typography": {"fontSize": int(data.get("font_size", 10))}, | |
| 1485 } | |
| 1486 | |
| 1487 config_json["configuration"].update(config_data) | |
| 1488 | |
| 1489 with open(config_path, "w") as config_file: | |
| 1490 json.dump(config_json, config_file, indent=2) | |
| 1491 | |
| 1492 def add_plugins(self, data): | |
| 1493 """ | |
| 1494 Add plugins to the config.json file | |
| 1495 """ | |
| 1496 | |
| 1497 config_path = os.path.join(self.outdir, "config.json") | |
| 1498 with open(config_path, "r") as config_file: | |
| 1499 config_json = json.load(config_file) | |
| 1500 | |
| 1501 if "plugins" not in config_json: | |
| 1502 config_json["plugins"] = [] | |
| 1503 | |
| 1504 config_json["plugins"].extend(data) | |
| 1505 | |
| 1506 with open(config_path, "w") as config_file: | |
| 1507 json.dump(config_json, config_file, indent=2) | |
| 1508 | |
| 1509 def clone_jbrowse(self, jbrowse_dir, destination): | |
| 1510 """ | |
| 1511 Clone a JBrowse directory into a destination directory. | |
| 1512 | |
| 1513 Not using `jbrowse create` command to allow running on internet-less compute + to make sure code is frozen | |
| 1514 """ | |
| 1515 | |
| 1516 copytree(jbrowse_dir, destination) | |
| 1517 try: | |
| 1518 shutil.rmtree(os.path.join(destination, "test_data")) | |
| 1519 except OSError as e: | |
| 1520 log.error(f"Error: {e.filename} - {e.strerror}.") | |
| 1521 | |
| 1522 if not os.path.exists(os.path.join(destination, "data")): | |
| 1523 # It can already exist if upgrading an instance | |
| 1524 os.makedirs(os.path.join(destination, "data")) | |
| 1525 log.info(f"makedir {os.path.join(destination, 'data')}") | |
| 1526 | |
| 1527 os.symlink("./data/config.json", os.path.join(destination, "config.json")) | |
| 1528 | |
| 1529 | |
| 1530 def copytree(src, dst, symlinks=False, ignore=None): | |
| 1531 for item in os.listdir(src): | |
| 1532 s = os.path.join(src, item) | |
| 1533 d = os.path.join(dst, item) | |
| 1534 if os.path.isdir(s): | |
| 1535 shutil.copytree(s, d, symlinks, ignore) | |
| 1536 else: | |
| 1537 shutil.copy2(s, d) | |
| 1538 | |
| 1539 | |
| 1540 def parse_style_conf(item): | |
| 1541 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]: | |
| 1542 if item.attrib["type"] == "boolean": | |
| 1543 return item.text in ("yes", "true", "True") | |
| 1544 elif item.attrib["type"] == "integer": | |
| 1545 return int(item.text) | |
| 1546 else: | |
| 1547 return item.text | |
| 1548 | |
| 1549 | |
| 1550 def validate_synteny(real_root): | |
| 1551 | |
| 1552 if len(real_root.findall('assembly/tracks/track[@format="synteny"]')) == 0: | |
| 1553 # No synteny data, all good | |
| 1554 return False | |
| 1555 | |
| 1556 assemblies = real_root.findall("assembly") | |
| 1557 | |
| 1558 if len(assemblies[-1].findall('tracks/track[@format="synteny"]')) > 0 and \ | |
| 1559 assemblies[-1].find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay": | |
| 1560 raise RuntimeError("You should not set a synteny track on the last genome.") | |
| 1561 | |
| 1562 for assembly in assemblies[1:0]: | |
| 1563 if len(assembly.findall('tracks/track[@format="synteny"]')) != 1 and \ | |
| 1564 assembly.find('tracks/track[@format="synteny"]/options/style/display').text == "LinearSyntenyDisplay": | |
| 1565 raise RuntimeError("To use the synteny viewer, you should add a synteny track to each assembly, except the last one.") | |
| 1566 | |
| 1567 return True | |
| 1568 | |
| 1569 | |
| 1570 if __name__ == "__main__": | |
| 1571 parser = argparse.ArgumentParser(description="", epilog="") | |
| 1572 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") | |
| 1573 | |
| 1574 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | |
| 1575 parser.add_argument("--update", help="Update an existing JBrowse2 instance", action="store_true") | |
| 1576 parser.add_argument("--outdir", help="Output directory", default="out") | |
| 1577 args = parser.parse_args() | |
| 1578 | |
| 1579 tree = ET.parse(args.xml.name) | |
| 1580 real_root = tree.getroot() | |
| 1581 | |
| 1582 # This should be done ASAP | |
| 1583 # Sometimes this comes as `localhost` without a protocol | |
| 1584 GALAXY_INFRASTRUCTURE_URL = real_root.find("metadata/galaxyUrl").text | |
| 1585 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): | |
| 1586 # so we'll prepend `http://` and hope for the best. Requests *should* | |
| 1587 # be GET and not POST so it should redirect OK | |
| 1588 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL | |
| 1589 | |
| 1590 jc = JbrowseConnector( | |
| 1591 jbrowse=args.jbrowse, | |
| 1592 outdir=args.outdir, | |
| 1593 update=args.update, | |
| 1594 ) | |
| 1595 | |
| 1596 # Synteny options are special, check them first | |
| 1597 jc.use_synteny_viewer = validate_synteny(real_root) | |
| 1598 | |
| 1599 for assembly in real_root.findall("assembly"): | |
| 1600 genome_el = assembly.find('genome') | |
| 1601 | |
| 1602 is_remote = genome_el.attrib.get("remote", "false") == "true" | |
| 1603 | |
| 1604 genome = { | |
| 1605 "path": genome_el.attrib["path"] if is_remote else os.path.realpath(genome_el.attrib["path"]), | |
| 1606 "meta": metadata_from_node(genome_el.find("metadata")), | |
| 1607 "label": genome_el.attrib["label"], | |
| 1608 } | |
| 1609 | |
| 1610 cytobands = None | |
| 1611 cytobands_el = genome_el.find("cytobands") | |
| 1612 if cytobands_el is not None and "path" in cytobands_el.attrib: | |
| 1613 cytobands = cytobands_el.attrib["path"] | |
| 1614 | |
| 1615 ref_name_aliases = None | |
| 1616 ref_name_aliases_el = genome_el.find("ref_name_aliases") | |
| 1617 if ref_name_aliases_el is not None and "path" in ref_name_aliases_el.attrib: | |
| 1618 ref_name_aliases = ref_name_aliases_el.attrib["path"] | |
| 1619 | |
| 1620 log.debug("Processing genome", genome) | |
| 1621 genome["uniq_id"] = jc.add_assembly(genome["path"], genome["label"], is_remote, cytobands, ref_name_aliases) | |
| 1622 | |
| 1623 default_tracks_on = [] | |
| 1624 | |
| 1625 track_num = 0 | |
| 1626 for track in assembly.findall("tracks/track"): | |
| 1627 track_conf = {} | |
| 1628 track_conf["trackfiles"] = [] | |
| 1629 track_conf["track_num"] = track_num | |
| 1630 | |
| 1631 trackfiles = track.findall("files/trackFile") or [] | |
| 1632 | |
| 1633 is_multi = False | |
| 1634 multi_paths = [] | |
| 1635 multi_type = None | |
| 1636 multi_metadata = {} | |
| 1637 try: | |
| 1638 multi_in_xml = track.find("options/multitrack") | |
| 1639 if multi_in_xml is not None and parse_style_conf(multi_in_xml): | |
| 1640 is_multi = True | |
| 1641 multi_paths = [] | |
| 1642 multi_type = trackfiles[0].attrib["ext"] | |
| 1643 except KeyError: | |
| 1644 pass | |
| 1645 | |
| 1646 is_remote = False | |
| 1647 if trackfiles: | |
| 1648 for x in trackfiles: | |
| 1649 if is_multi: | |
| 1650 is_remote = x.attrib.get("remote", "false") == "true" | |
| 1651 multi_paths.append( | |
| 1652 (x.attrib["label"], x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"])) | |
| 1653 ) | |
| 1654 multi_metadata.update(metadata_from_node(x.find("metadata"))) | |
| 1655 else: | |
| 1656 metadata = metadata_from_node(x.find("metadata")) | |
| 1657 is_remote = x.attrib.get("remote", "false") == "true" | |
| 1658 track_conf["trackfiles"].append( | |
| 1659 ( | |
| 1660 x.attrib["path"] if is_remote else os.path.realpath(x.attrib["path"]), | |
| 1661 x.attrib["ext"], | |
| 1662 x.attrib["label"], | |
| 1663 metadata, | |
| 1664 ) | |
| 1665 ) | |
| 1666 else: | |
| 1667 # For tracks without files (sparql, gc) | |
| 1668 track_conf["trackfiles"].append( | |
| 1669 ( | |
| 1670 "", # N/A, no path for sparql or gc | |
| 1671 track.attrib["format"], | |
| 1672 track.find("options/label").text, | |
| 1673 {}, | |
| 1674 ) | |
| 1675 ) | |
| 1676 | |
| 1677 if is_multi: | |
| 1678 etal_tracks_nb = len(multi_paths[1:]) | |
| 1679 multi_label = f"{multi_paths[0][0]} + {etal_tracks_nb} other track{'s' if etal_tracks_nb > 1 else ''}" | |
| 1680 | |
| 1681 track_conf["trackfiles"].append( | |
| 1682 ( | |
| 1683 multi_paths, # Passing an array of paths to represent as one track | |
| 1684 multi_type, # First file type | |
| 1685 multi_label, # First file label | |
| 1686 multi_metadata, # Mix of all metadata for multiple bigwig => only last file metadata coming from galaxy + custom oness | |
| 1687 ) | |
| 1688 ) | |
| 1689 track_conf["category"] = track.attrib["cat"] | |
| 1690 track_conf["format"] = track.attrib["format"] | |
| 1691 track_conf["style"] = { | |
| 1692 item.tag: parse_style_conf(item) for item in (track.find("options/style") or []) | |
| 1693 } | |
| 1694 | |
| 1695 track_conf["style"] = { | |
| 1696 item.tag: parse_style_conf(item) for item in (track.find("options/style") or []) | |
| 1697 } | |
| 1698 | |
| 1699 track_conf["style_labels"] = { | |
| 1700 item.tag: parse_style_conf(item) | |
| 1701 for item in (track.find("options/style_labels") or []) | |
| 1702 } | |
| 1703 track_conf["formatdetails"] = { | |
| 1704 item.tag: parse_style_conf(item) for item in (track.find("options/formatdetails") or []) | |
| 1705 } | |
| 1706 | |
| 1707 track_conf["conf"] = etree_to_dict(track.find("options")) | |
| 1708 | |
| 1709 track_conf["remote"] = is_remote | |
| 1710 | |
| 1711 track_labels = jc.process_annotations(track_conf, genome) | |
| 1712 | |
| 1713 if track.attrib["visibility"] == "default_on": | |
| 1714 for tlabel in track_labels: | |
| 1715 default_tracks_on.append(tlabel) | |
| 1716 | |
| 1717 track_num += 1 | |
| 1718 | |
| 1719 default_loc = assembly.find("defaultLocation").text | |
| 1720 | |
| 1721 jc.default_views[genome['uniq_id']] = jc.add_default_view_genome(genome, default_loc, default_tracks_on) | |
| 1722 | |
| 1723 if jc.use_synteny_viewer: | |
| 1724 synteny_view = jc.add_default_view_synteny(list(jc.default_views.values()), jc.synteny_tracks) | |
| 1725 | |
| 1726 views_for_session = jc._load_old_synteny_views() | |
| 1727 | |
| 1728 views_for_session.append(synteny_view) | |
| 1729 else: | |
| 1730 old_views = jc._load_old_genome_views() | |
| 1731 | |
| 1732 for old_view in old_views: | |
| 1733 if old_view not in jc.default_views: | |
| 1734 jc.default_views[old_view] = old_views[old_view] | |
| 1735 | |
| 1736 views_for_session = list(jc.default_views.values()) | |
| 1737 | |
| 1738 general_data = { | |
| 1739 "analytics": real_root.find("metadata/general/analytics").text, | |
| 1740 "primary_color": real_root.find("metadata/general/primary_color").text, | |
| 1741 "secondary_color": real_root.find("metadata/general/secondary_color").text, | |
| 1742 "tertiary_color": real_root.find("metadata/general/tertiary_color").text, | |
| 1743 "quaternary_color": real_root.find("metadata/general/quaternary_color").text, | |
| 1744 "font_size": real_root.find("metadata/general/font_size").text, | |
| 1745 } | |
| 1746 | |
| 1747 jc.add_default_session(views_for_session) | |
| 1748 jc.add_general_configuration(general_data) | |
| 1749 jc.add_plugins(jc.plugins) | |
| 1750 jc.text_index() |
