Mercurial > repos > greg > genetrack
comparison genetrack.py @ 10:1a9f1a4fa36c draft
Uploaded
author | greg |
---|---|
date | Wed, 02 Dec 2015 16:14:58 -0500 |
parents | 551630d1fae3 |
children | 6ad44f393892 |
comparison
equal
deleted
inserted
replaced
9:e10c1ddd440e | 10:1a9f1a4fa36c |
---|---|
7 import optparse | 7 import optparse |
8 import csv | 8 import csv |
9 import os | 9 import os |
10 import genetrack_util | 10 import genetrack_util |
11 | 11 |
12 CHUNK_SIZE = 10000000 | |
13 | |
12 | 14 |
13 if __name__ == '__main__': | 15 if __name__ == '__main__': |
14 parser = optparse.OptionParser() | 16 parser = optparse.OptionParser() |
15 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') | 17 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') |
16 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') | 18 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') |
17 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') | 19 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') |
18 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') | 20 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') |
19 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') | 21 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') |
20 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') | 22 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') |
21 parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.') | 23 parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.') |
22 parser.add_option('-c', '--chunk_size', dest='chunk_size', type='int', default=10, help='Size, in millions of base pairs.') | |
23 options, args = parser.parse_args() | 24 options, args = parser.parse_args() |
24 | 25 |
25 os.mkdir('output') | 26 os.mkdir('output') |
26 for (dataset_path, hid) in options.inputs: | 27 for (dataset_path, hid) in options.inputs: |
27 if options.input_format == 'gff': | 28 if options.input_format == 'gff': |
37 str(options.filter), | 38 str(options.filter), |
38 str(hid)) | 39 str(hid)) |
39 output_path = os.path.join('output', output_name) | 40 output_path = os.path.join('output', output_name) |
40 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') | 41 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') |
41 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') | 42 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') |
42 chunk_size = options.chunk_size * 10 ** 6 | |
43 width = options.sigma * 5 | 43 width = options.sigma * 5 |
44 manager = genetrack_util.ChromosomeManager(reader) | 44 manager = genetrack_util.ChromosomeManager(reader) |
45 while not manager.done: | 45 while not manager.done: |
46 cname = manager.chromosome_name() | 46 cname = manager.chromosome_name() |
47 # Should we process this chromosome? | 47 # Should we process this chromosome? |
48 data = manager.load_chromosome() | 48 data = manager.load_chromosome() |
49 if not data: | 49 if not data: |
50 continue | 50 continue |
51 keys = genetrack_util.make_keys(data) | 51 keys = genetrack_util.make_keys(data) |
52 lo, hi = genetrack_util.get_range(data) | 52 lo, hi = genetrack_util.get_range(data) |
53 for chunk in genetrack_util.get_chunks(lo, hi, size=chunk_size, overlap=width): | 53 for chunk in genetrack_util.get_chunks(lo, hi, size=CHUNK_SIZE, overlap=width): |
54 (slice_start, slice_end), process_bounds = chunk | 54 (slice_start, slice_end), process_bounds = chunk |
55 window = genetrack_util.get_window(data, slice_start, slice_end, keys) | 55 window = genetrack_util.get_window(data, slice_start, slice_end, keys) |
56 genetrack_util.process_chromosome(cname, | 56 genetrack_util.process_chromosome(cname, |
57 window, | 57 window, |
58 writer, | 58 writer, |