comparison genetrack.py @ 2:fcc2f5992551 draft

Uploaded
author greg
date Wed, 18 Nov 2015 08:12:26 -0500
parents 0368815ae4d5
children 551630d1fae3
comparison
equal deleted inserted replaced
1:31cc73d7c234 2:fcc2f5992551
7 7
8 Output: Called peaks in either gff or txt format 8 Output: Called peaks in either gff or txt format
9 .txt format: tab-separated chromosome, strand, start, end, read count 9 .txt format: tab-separated chromosome, strand, start, end, read count
10 .gff format: standard gff, score is read count 10 .gff format: standard gff, score is read count
11 """ 11 """
12 import argparse 12 import optparse
13 import csv 13 import csv
14 import os 14 import os
15 import genetrack_util 15 import genetrack_util
16 16
17 17
18 if __name__ == '__main__': 18 if __name__ == '__main__':
19 parser = argparse.ArgumentParser() 19 parser = optparse.OptionParser()
20 parser.add_argument('--input_format', dest='input_format', help='Input format.') 20 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format')
21 parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets") 21 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets')
22 parser.add_argument('--sigma', dest='sigma', type=int, default=5, help='Sigma.') 22 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.')
23 parser.add_argument('--exclusion', dest='exclusion', type=int, default=20, help='Exclusion zone.') 23 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.')
24 parser.add_argument('--up_width', dest='up_width', type=int, default=10, help='Upstream width of called peaks.') 24 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.')
25 parser.add_argument('--down_width', dest='down_width', type=int, default=10, help='Downstream width of called peaks.') 25 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.')
26 parser.add_argument('--filter', dest='filter', type=int, default=3, help='Absolute read filter.') 26 parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.')
27 parser.add_argument('--chunk_size', dest='chunk_size', type=int, default=10, help='Size, in millions of base pairs.') 27 parser.add_option('-c', '--chunk_size', dest='chunk_size', type='int', default=10, help='Size, in millions of base pairs.')
28 args = parser.parse_args() 28 options, args = parser.parse_args()
29 29
30 os.mkdir('output') 30 os.mkdir('output')
31 for (dataset_path, hid) in args.inputs: 31 for (dataset_path, hid) in options.inputs:
32 if args.input_format == 'gff': 32 if options.input_format == 'gff':
33 # Make sure the reads for each chromosome are sorted by index. 33 # Make sure the reads for each chromosome are sorted by index.
34 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path) 34 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path)
35 else: 35 else:
36 # We're processing ssccidx data. 36 # We're processing ssccidx data.
37 input_path = dataset_path 37 input_path = dataset_path
38 output_name = 's%se%su%sd%sF%s_on_data_%s' % (str(args.sigma), 38 output_name = 's%se%su%sd%sF%s_on_data_%s' % (str(options.sigma),
39 str(args.exclusion), 39 str(options.exclusion),
40 str(args.up_width), 40 str(options.up_width),
41 str(args.down_width), 41 str(options.down_width),
42 str(args.filter), 42 str(options.filter),
43 str(hid)) 43 str(hid))
44 output_path = os.path.join('output', output_name) 44 output_path = os.path.join('output', output_name)
45 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') 45 reader = csv.reader(open(input_path, 'rU'), delimiter='\t')
46 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') 46 writer = csv.writer(open(output_path, 'wt'), delimiter='\t')
47 chunk_size = args.chunk_size * 10 ** 6 47 chunk_size = options.chunk_size * 10 ** 6
48 width = args.sigma * 5 48 width = options.sigma * 5
49 manager = genetrack_util.ChromosomeManager(reader) 49 manager = genetrack_util.ChromosomeManager(reader)
50 while not manager.done: 50 while not manager.done:
51 cname = manager.chromosome_name() 51 cname = manager.chromosome_name()
52 # Should we process this chromosome? 52 # Should we process this chromosome?
53 data = manager.load_chromosome() 53 data = manager.load_chromosome()
60 window = genetrack_util.get_window(data, slice_start, slice_end, keys) 60 window = genetrack_util.get_window(data, slice_start, slice_end, keys)
61 genetrack_util.process_chromosome(cname, 61 genetrack_util.process_chromosome(cname,
62 window, 62 window,
63 writer, 63 writer,
64 process_bounds, 64 process_bounds,
65 args.width, 65 width,
66 args.sigma, 66 options.sigma,
67 args.up_width, 67 options.up_width,
68 args.down_width, 68 options.down_width,
69 args.exclusion, 69 options.exclusion,
70 args.filter) 70 options.filter)