| 0 | 1 """ | 
|  | 2 genetrack.py | 
|  | 3 | 
|  | 4 Input: either ssccidx or gff format of reads | 
|  | 5 .ssccidx format: tab-separated chromosome (chr##), index, + reads, - reads | 
|  | 6 .gff format: standard gff, score interpreted as number of reads | 
|  | 7 | 
|  | 8 Output: Called peaks in either gff or txt format | 
|  | 9 .txt format: tab-separated chromosome, strand, start, end, read count | 
|  | 10 .gff format: standard gff, score is read count | 
|  | 11 """ | 
| 2 | 12 import optparse | 
| 0 | 13 import csv | 
|  | 14 import os | 
|  | 15 import genetrack_util | 
|  | 16 | 
|  | 17 | 
|  | 18 if __name__ == '__main__': | 
| 2 | 19     parser = optparse.OptionParser() | 
|  | 20     parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') | 
|  | 21     parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') | 
|  | 22     parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') | 
|  | 23     parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') | 
|  | 24     parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') | 
|  | 25     parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') | 
|  | 26     parser.add_option('-f', '--filter', dest='filter', type='int', default=3, help='Absolute read filter.') | 
|  | 27     parser.add_option('-c', '--chunk_size', dest='chunk_size', type='int', default=10, help='Size, in millions of base pairs.') | 
|  | 28     options, args = parser.parse_args() | 
| 0 | 29 | 
|  | 30     os.mkdir('output') | 
| 2 | 31     for (dataset_path, hid) in options.inputs: | 
|  | 32         if options.input_format == 'gff': | 
| 0 | 33             # Make sure the reads for each chromosome are sorted by index. | 
|  | 34             input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path) | 
|  | 35         else: | 
|  | 36             # We're processing ssccidx data. | 
|  | 37             input_path = dataset_path | 
| 2 | 38         output_name = 's%se%su%sd%sF%s_on_data_%s' % (str(options.sigma), | 
|  | 39                                                       str(options.exclusion), | 
|  | 40                                                       str(options.up_width), | 
|  | 41                                                       str(options.down_width), | 
|  | 42                                                       str(options.filter), | 
| 0 | 43                                                       str(hid)) | 
|  | 44         output_path = os.path.join('output', output_name) | 
|  | 45         reader = csv.reader(open(input_path, 'rU'), delimiter='\t') | 
|  | 46         writer = csv.writer(open(output_path, 'wt'), delimiter='\t') | 
| 2 | 47         chunk_size = options.chunk_size * 10 ** 6 | 
|  | 48         width = options.sigma * 5 | 
| 0 | 49         manager = genetrack_util.ChromosomeManager(reader) | 
|  | 50         while not manager.done: | 
|  | 51             cname = manager.chromosome_name() | 
|  | 52             # Should we process this chromosome? | 
|  | 53             data = manager.load_chromosome() | 
|  | 54             if not data: | 
|  | 55                 continue | 
|  | 56             keys = genetrack_util.make_keys(data) | 
|  | 57             lo, hi = genetrack_util.get_range(data) | 
|  | 58             for chunk in genetrack_util.get_chunks(lo, hi, size=chunk_size, overlap=width): | 
|  | 59                 (slice_start, slice_end), process_bounds = chunk | 
|  | 60                 window = genetrack_util.get_window(data, slice_start, slice_end, keys) | 
|  | 61                 genetrack_util.process_chromosome(cname, | 
|  | 62                                                   window, | 
|  | 63                                                   writer, | 
|  | 64                                                   process_bounds, | 
| 2 | 65                                                   width, | 
|  | 66                                                   options.sigma, | 
|  | 67                                                   options.up_width, | 
|  | 68                                                   options.down_width, | 
|  | 69                                                   options.exclusion, | 
|  | 70                                                   options.filter) |