annotate genetrack.py @ 5:5aeb86949eae draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
author iuc
date Wed, 05 Jul 2017 11:56:37 -0400
parents 4644c2eee4e5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
1 """
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
2 genetrack.py
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
3
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
4 Input: either scidx or gff format of reads
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
5 Output: Called peaks in gff format
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
6 """
4
4644c2eee4e5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents: 0
diff changeset
7 import csv
0
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
8 import optparse
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
9 import os
4
4644c2eee4e5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents: 0
diff changeset
10
0
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
11 import genetrack_util
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
12
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
13 CHUNK_SIZE = 10000000
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
14
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
15
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
16 if __name__ == '__main__':
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
17 parser = optparse.OptionParser()
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
18 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
19 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
20 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
21 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
22 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
23 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
24 parser.add_option('-f', '--filter', dest='filter', type='int', default=1, help='Absolute read filter.')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
25 options, args = parser.parse_args()
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
26
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
27 os.mkdir('output')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
28 for (dataset_path, hid) in options.inputs:
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
29 if options.input_format == 'gff':
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
30 # Make sure the reads for each chromosome are sorted by index.
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
31 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
32 else:
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
33 # We're processing scidx data.
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
34 input_path = dataset_path
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
35 output_name = 's%se%su%sd%sF%s_on_data_%s' % (options.sigma,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
36 options.exclusion,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
37 options.up_width,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
38 options.down_width,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
39 options.filter,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
40 hid)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
41 output_path = os.path.join('output', output_name)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
42 reader = csv.reader(open(input_path, 'rU'), delimiter='\t')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
43 writer = csv.writer(open(output_path, 'wt'), delimiter='\t')
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
44 width = options.sigma * 5
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
45 manager = genetrack_util.ChromosomeManager(reader)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
46 while not manager.done:
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
47 cname = manager.chromosome_name()
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
48 # Should we process this chromosome?
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
49 data = manager.load_chromosome()
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
50 if not data:
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
51 continue
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
52 keys = genetrack_util.make_keys(data)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
53 lo, hi = genetrack_util.get_range(data)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
54 for chunk in genetrack_util.get_chunks(lo, hi, size=CHUNK_SIZE, overlap=width):
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
55 (slice_start, slice_end), process_bounds = chunk
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
56 window = genetrack_util.get_window(data, slice_start, slice_end, keys)
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
57 genetrack_util.process_chromosome(cname,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
58 window,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
59 writer,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
60 process_bounds,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
61 width,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
62 options.sigma,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
63 options.up_width,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
64 options.down_width,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
65 options.exclusion,
a387a9e49dd6 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
66 options.filter)