annotate create_window_positions_by_chrom.py @ 125:5e545c9030a0 draft

Uploaded
author greg
date Tue, 21 Nov 2017 14:39:31 -0500
parents 7a5b618675a6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
124
7a5b618675a6 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env python
7a5b618675a6 Uploaded
greg
parents:
diff changeset
2 import argparse
7a5b618675a6 Uploaded
greg
parents:
diff changeset
3 import collections
7a5b618675a6 Uploaded
greg
parents:
diff changeset
4
7a5b618675a6 Uploaded
greg
parents:
diff changeset
5 parser = argparse.ArgumentParser()
7a5b618675a6 Uploaded
greg
parents:
diff changeset
6 parser.add_argument('--input', dest='input', help='Input bed dataset')
7a5b618675a6 Uploaded
greg
parents:
diff changeset
7 parser.add_argument('--output', dest='output', help='Output window positions by chromosome dataset')
7a5b618675a6 Uploaded
greg
parents:
diff changeset
8
7a5b618675a6 Uploaded
greg
parents:
diff changeset
9 args = parser.parse_args()
7a5b618675a6 Uploaded
greg
parents:
diff changeset
10
7a5b618675a6 Uploaded
greg
parents:
diff changeset
11 chroms = collections.OrderedDict()
7a5b618675a6 Uploaded
greg
parents:
diff changeset
12
7a5b618675a6 Uploaded
greg
parents:
diff changeset
13 with open(args.input, 'r') as fh:
7a5b618675a6 Uploaded
greg
parents:
diff changeset
14 for count, line in enumerate(fh):
7a5b618675a6 Uploaded
greg
parents:
diff changeset
15 line = line.strip()
7a5b618675a6 Uploaded
greg
parents:
diff changeset
16 if not line or line.startswith('#'):
7a5b618675a6 Uploaded
greg
parents:
diff changeset
17 # Skip blank lines and comments.
7a5b618675a6 Uploaded
greg
parents:
diff changeset
18 continue
7a5b618675a6 Uploaded
greg
parents:
diff changeset
19 items = line.split('\t')
7a5b618675a6 Uploaded
greg
parents:
diff changeset
20 chrom = items[0]
7a5b618675a6 Uploaded
greg
parents:
diff changeset
21 if count == 0:
7a5b618675a6 Uploaded
greg
parents:
diff changeset
22 # First window.
7a5b618675a6 Uploaded
greg
parents:
diff changeset
23 chroms[chrom] = [0, count+1]
7a5b618675a6 Uploaded
greg
parents:
diff changeset
24 elif chrom in chroms:
7a5b618675a6 Uploaded
greg
parents:
diff changeset
25 # Get the start / end tuple.
7a5b618675a6 Uploaded
greg
parents:
diff changeset
26 tup = chroms[chrom]
7a5b618675a6 Uploaded
greg
parents:
diff changeset
27 # Increment end by 1.
7a5b618675a6 Uploaded
greg
parents:
diff changeset
28 tup[1] += 1
7a5b618675a6 Uploaded
greg
parents:
diff changeset
29 chroms[chrom] = tup
7a5b618675a6 Uploaded
greg
parents:
diff changeset
30 else:
7a5b618675a6 Uploaded
greg
parents:
diff changeset
31 # chrom not in chroms.
7a5b618675a6 Uploaded
greg
parents:
diff changeset
32 chroms[chrom] = [count, count+1]
7a5b618675a6 Uploaded
greg
parents:
diff changeset
33
7a5b618675a6 Uploaded
greg
parents:
diff changeset
34 with open(args.output, 'w') as fh:
7a5b618675a6 Uploaded
greg
parents:
diff changeset
35 for k, v in chroms.items():
7a5b618675a6 Uploaded
greg
parents:
diff changeset
36 fh.write('%s %d %d\n' % (k, v[0], v[1]))