124
|
1 #!/usr/bin/env python
|
|
2 import argparse
|
|
3 import collections
|
|
4
|
|
5 parser = argparse.ArgumentParser()
|
|
6 parser.add_argument('--input', dest='input', help='Input bed dataset')
|
|
7 parser.add_argument('--output', dest='output', help='Output window positions by chromosome dataset')
|
|
8
|
|
9 args = parser.parse_args()
|
|
10
|
|
11 chroms = collections.OrderedDict()
|
|
12
|
|
13 with open(args.input, 'r') as fh:
|
|
14 for count, line in enumerate(fh):
|
|
15 line = line.strip()
|
|
16 if not line or line.startswith('#'):
|
|
17 # Skip blank lines and comments.
|
|
18 continue
|
|
19 items = line.split('\t')
|
|
20 chrom = items[0]
|
|
21 if count == 0:
|
|
22 # First window.
|
|
23 chroms[chrom] = [0, count+1]
|
|
24 elif chrom in chroms:
|
|
25 # Get the start / end tuple.
|
|
26 tup = chroms[chrom]
|
|
27 # Increment end by 1.
|
|
28 tup[1] += 1
|
|
29 chroms[chrom] = tup
|
|
30 else:
|
|
31 # chrom not in chroms.
|
|
32 chroms[chrom] = [count, count+1]
|
|
33
|
|
34 with open(args.output, 'w') as fh:
|
|
35 for k, v in chroms.items():
|
|
36 fh.write('%s %d %d\n' % (k, v[0], v[1]))
|