Mercurial > repos > greg > genetrack
comparison genetrack.py @ 2:fcc2f5992551 draft
Uploaded
author | greg |
---|---|
date | Wed, 18 Nov 2015 08:12:26 -0500 |
parents | 0368815ae4d5 |
children | 551630d1fae3 |
comparison
equal
deleted
inserted
replaced
1:31cc73d7c234 | 2:fcc2f5992551 |
---|---|
7 | 7 |
8 Output: Called peaks in either gff or txt format | 8 Output: Called peaks in either gff or txt format |
9 .txt format: tab-separated chromosome, strand, start, end, read count | 9 .txt format: tab-separated chromosome, strand, start, end, read count |
10 .gff format: standard gff, score is read count | 10 .gff format: standard gff, score is read count |
11 """ | 11 """ |
12 import argparse | 12 import optparse |
13 import csv | 13 import csv |
14 import os | 14 import os |
15 import genetrack_util | 15 import genetrack_util |
16 | 16 |
17 | 17 |
18 if __name__ == '__main__': | 18 if __name__ == '__main__': |
19 parser = argparse.ArgumentParser() | 19 parser = optparse.OptionParser() |
20 parser.add_argument('--input_format', dest='input_format', help='Input format.') | 20 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') |
21 parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets") | 21 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') |
22 parser.add_argument('--sigma', dest='sigma', type=int, default=5, help='Sigma.') | 22 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') |
23 parser.add_argument('--exclusion', dest='exclusion', type=int, default=20, help='Exclusion zone.') | 23 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') |
24 parser.add_argument('--up_width', dest='up_width', type=int, default=10, help='Upstream width of called peaks.') | 24 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') |
25 parser.add_argument('--down_width', dest='down_width', type=int, default=10, help='Downstream width of called peaks.') | 25 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') |
26 parser.add_argument('--filter', dest='filter', type=int, default=3, help='Absolute read filter.') | 26 parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.') |
27 parser.add_argument('--chunk_size', dest='chunk_size', type=int, default=10, help='Size, in millions of base pairs.') | 27 parser.add_option('-c', '--chunk_size', dest='chunk_size', type='int', default=10, help='Size, in millions of base pairs.') |
28 args = parser.parse_args() | 28 options, args = parser.parse_args() |
29 | 29 |
30 os.mkdir('output') | 30 os.mkdir('output') |
31 for (dataset_path, hid) in args.inputs: | 31 for (dataset_path, hid) in options.inputs: |
32 if args.input_format == 'gff': | 32 if options.input_format == 'gff': |
33 # Make sure the reads for each chromosome are sorted by index. | 33 # Make sure the reads for each chromosome are sorted by index. |
34 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path) | 34 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path) |
35 else: | 35 else: |
36 # We're processing ssccidx data. | 36 # We're processing ssccidx data. |
37 input_path = dataset_path | 37 input_path = dataset_path |
38 output_name = 's%se%su%sd%sF%s_on_data_%s' % (str(args.sigma), | 38 output_name = 's%se%su%sd%sF%s_on_data_%s' % (str(options.sigma), |
39 str(args.exclusion), | 39 str(options.exclusion), |
40 str(args.up_width), | 40 str(options.up_width), |
41 str(args.down_width), | 41 str(options.down_width), |
42 str(args.filter), | 42 str(options.filter), |
43 str(hid)) | 43 str(hid)) |
44 output_path = os.path.join('output', output_name) | 44 output_path = os.path.join('output', output_name) |
45 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') | 45 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') |
46 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') | 46 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') |
47 chunk_size = args.chunk_size * 10 ** 6 | 47 chunk_size = options.chunk_size * 10 ** 6 |
48 width = args.sigma * 5 | 48 width = options.sigma * 5 |
49 manager = genetrack_util.ChromosomeManager(reader) | 49 manager = genetrack_util.ChromosomeManager(reader) |
50 while not manager.done: | 50 while not manager.done: |
51 cname = manager.chromosome_name() | 51 cname = manager.chromosome_name() |
52 # Should we process this chromosome? | 52 # Should we process this chromosome? |
53 data = manager.load_chromosome() | 53 data = manager.load_chromosome() |
60 window = genetrack_util.get_window(data, slice_start, slice_end, keys) | 60 window = genetrack_util.get_window(data, slice_start, slice_end, keys) |
61 genetrack_util.process_chromosome(cname, | 61 genetrack_util.process_chromosome(cname, |
62 window, | 62 window, |
63 writer, | 63 writer, |
64 process_bounds, | 64 process_bounds, |
65 args.width, | 65 width, |
66 args.sigma, | 66 options.sigma, |
67 args.up_width, | 67 options.up_width, |
68 args.down_width, | 68 options.down_width, |
69 args.exclusion, | 69 options.exclusion, |
70 args.filter) | 70 options.filter) |