# HG changeset patch # User greg # Date 1511293163 18000 # Node ID 7a5b618675a63856d15e56ff8fdc556ce89144e8 # Parent e2995f2f127f6464bcc16db3b9008a0ea131c3ca Uploaded diff -r e2995f2f127f -r 7a5b618675a6 create_window_positions_by_chrom.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_window_positions_by_chrom.py Tue Nov 21 14:39:23 2017 -0500 @@ -0,0 +1,36 @@ +#!/usr/bin/env python +import argparse +import collections + +parser = argparse.ArgumentParser() +parser.add_argument('--input', dest='input', help='Input bed dataset') +parser.add_argument('--output', dest='output', help='Output window positions by chromosome dataset') + +args = parser.parse_args() + +chroms = collections.OrderedDict() + +with open(args.input, 'r') as fh: + for count, line in enumerate(fh): + line = line.strip() + if not line or line.startswith('#'): + # Skip blank lines and comments. + continue + items = line.split('\t') + chrom = items[0] + if count == 0: + # First window. + chroms[chrom] = [0, count+1] + elif chrom in chroms: + # Get the start / end tuple. + tup = chroms[chrom] + # Increment end by 1. + tup[1] += 1 + chroms[chrom] = tup + else: + # chrom not in chroms. + chroms[chrom] = [count, count+1] + +with open(args.output, 'w') as fh: + for k, v in chroms.items(): + fh.write('%s %d %d\n' % (k, v[0], v[1]))