Mercurial > repos > greg > genetrack
comparison genetrack.py @ 10:1a9f1a4fa36c draft
Uploaded
| author | greg |
|---|---|
| date | Wed, 02 Dec 2015 16:14:58 -0500 |
| parents | 551630d1fae3 |
| children | 6ad44f393892 |
comparison
equal
deleted
inserted
replaced
| 9:e10c1ddd440e | 10:1a9f1a4fa36c |
|---|---|
| 7 import optparse | 7 import optparse |
| 8 import csv | 8 import csv |
| 9 import os | 9 import os |
| 10 import genetrack_util | 10 import genetrack_util |
| 11 | 11 |
| 12 CHUNK_SIZE = 10000000 | |
| 13 | |
| 12 | 14 |
| 13 if __name__ == '__main__': | 15 if __name__ == '__main__': |
| 14 parser = optparse.OptionParser() | 16 parser = optparse.OptionParser() |
| 15 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') | 17 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') |
| 16 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') | 18 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') |
| 17 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') | 19 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') |
| 18 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') | 20 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') |
| 19 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') | 21 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') |
| 20 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') | 22 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') |
| 21 parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.') | 23 parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.') |
| 22 parser.add_option('-c', '--chunk_size', dest='chunk_size', type='int', default=10, help='Size, in millions of base pairs.') | |
| 23 options, args = parser.parse_args() | 24 options, args = parser.parse_args() |
| 24 | 25 |
| 25 os.mkdir('output') | 26 os.mkdir('output') |
| 26 for (dataset_path, hid) in options.inputs: | 27 for (dataset_path, hid) in options.inputs: |
| 27 if options.input_format == 'gff': | 28 if options.input_format == 'gff': |
| 37 str(options.filter), | 38 str(options.filter), |
| 38 str(hid)) | 39 str(hid)) |
| 39 output_path = os.path.join('output', output_name) | 40 output_path = os.path.join('output', output_name) |
| 40 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') | 41 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') |
| 41 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') | 42 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') |
| 42 chunk_size = options.chunk_size * 10 ** 6 | |
| 43 width = options.sigma * 5 | 43 width = options.sigma * 5 |
| 44 manager = genetrack_util.ChromosomeManager(reader) | 44 manager = genetrack_util.ChromosomeManager(reader) |
| 45 while not manager.done: | 45 while not manager.done: |
| 46 cname = manager.chromosome_name() | 46 cname = manager.chromosome_name() |
| 47 # Should we process this chromosome? | 47 # Should we process this chromosome? |
| 48 data = manager.load_chromosome() | 48 data = manager.load_chromosome() |
| 49 if not data: | 49 if not data: |
| 50 continue | 50 continue |
| 51 keys = genetrack_util.make_keys(data) | 51 keys = genetrack_util.make_keys(data) |
| 52 lo, hi = genetrack_util.get_range(data) | 52 lo, hi = genetrack_util.get_range(data) |
| 53 for chunk in genetrack_util.get_chunks(lo, hi, size=chunk_size, overlap=width): | 53 for chunk in genetrack_util.get_chunks(lo, hi, size=CHUNK_SIZE, overlap=width): |
| 54 (slice_start, slice_end), process_bounds = chunk | 54 (slice_start, slice_end), process_bounds = chunk |
| 55 window = genetrack_util.get_window(data, slice_start, slice_end, keys) | 55 window = genetrack_util.get_window(data, slice_start, slice_end, keys) |
| 56 genetrack_util.process_chromosome(cname, | 56 genetrack_util.process_chromosome(cname, |
| 57 window, | 57 window, |
| 58 writer, | 58 writer, |
