annotate build_ucsc_custom_track.py @ 2:f1fd9e6e026e draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
author devteam
date Mon, 28 Feb 2022 20:05:39 +0000
parents 783448899c63
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
2 """
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
3 Build a UCSC genome browser custom track file
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
4 """
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
5
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
6 import sys
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
7
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
8 FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'}
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
9 CHUNK_SIZE = 2**20 # 1 mb
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
10
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
11
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
12 def get_track_line_is_interval(file_type, name, description, color, visibility):
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
13 if file_type in FILE_TYPE_TO_TRACK_TYPE:
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
14 track_type = FILE_TYPE_TO_TRACK_TYPE[file_type]
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
15 is_interval = False
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
16 else:
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
17 track_type = None
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
18 is_interval = True
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
19 track_line = 'track '
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
20 if track_type:
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
21 track_line += f"type={track_type} "
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
22 track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n'
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
23 return track_line, is_interval
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
24
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
25
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
26 num_tracks = 0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
27 skipped_lines = 0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
28 first_invalid_line = 0
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
29 args = sys.argv[1:]
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
30 out_fname = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
31 with open(out_fname, "w") as out:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
32 while args:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
33 # Suck in one dataset worth of arguments
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
34 in_fname = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
35 file_type = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
36 colspec = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
37 name = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
38 description = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
39 color = args.pop(0).replace('-', ',')
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
40 visibility = args.pop(0)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
41 track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
42 # Do the work
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
43 out.write(track_line)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
44 with open(in_fname) as in_file:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
45 if not is_interval:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
46 while True:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
47 chunk = in_file.read(CHUNK_SIZE)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
48 if chunk:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
49 out.write(chunk)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
50 else:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
51 break
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
52 else:
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
53 # Assume type is interval (don't pass this script anything else!)
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
54 try:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
55 c, s, e, st = (int(x) - 1 for x in colspec.split(","))
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
56 except ValueError:
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
57 try:
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
58 c, s, e = (int(x) - 1 for x in colspec.split(",")[:3])
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
59 st = -1 # strand column is absent
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
60 except Exception:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
61 sys.exit("Columns in interval file invalid for UCSC custom track.")
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
62
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
63 i = 0
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
64 for i, line in enumerate(in_file):
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
65 line = line.rstrip('\r\n')
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
66 if line and not line.startswith('#'):
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
67 fields = line.split("\t")
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
68 if st > 0:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
69 # strand column is present
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
70 try:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
71 out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n")
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
72 except Exception:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
73 skipped_lines += 1
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
74 if not first_invalid_line:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
75 first_invalid_line = i + 1
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
76 else:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
77 try:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
78 out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n")
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
79 except Exception:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
80 skipped_lines += 1
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
81 if not first_invalid_line:
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
82 first_invalid_line = i + 1
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
83 out.write("\n") # separating newline
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
84 num_tracks += 1
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
85
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
86 print(f"Generated a custom track containing {num_tracks} subtracks.")
0
783448899c63 Imported from capsule None
devteam
parents:
diff changeset
87 if skipped_lines:
2
f1fd9e6e026e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
88 print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}")