Mercurial > repos > greg > genetrack
comparison genetrack_util.py @ 7:a7da50a23270 draft
Uploaded
author | greg |
---|---|
date | Tue, 24 Nov 2015 08:14:42 -0500 |
parents | a952b6740fb9 |
children |
comparison
equal
deleted
inserted
replaced
6:fa85ca6c9cf8 | 7:a7da50a23270 |
---|---|
7 import tempfile | 7 import tempfile |
8 | 8 |
9 GFF_EXT = 'gff' | 9 GFF_EXT = 'gff' |
10 SCIDX_EXT = 'scidx' | 10 SCIDX_EXT = 'scidx' |
11 | 11 |
12 ROMAN = ['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', | |
13 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX', | |
14 'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX', | |
15 'XXX'] | |
16 | |
17 | 12 |
18 def noop(data): | 13 def noop(data): |
19 return data | 14 return data |
20 | 15 |
21 | 16 |
25 | 20 |
26 def numeric_to_zeropad(data): | 21 def numeric_to_zeropad(data): |
27 return re.sub(r'chr(\d([^\d]|$))', r'chr0\1', data) | 22 return re.sub(r'chr(\d([^\d]|$))', r'chr0\1', data) |
28 | 23 |
29 | 24 |
30 def roman_to_numeric(data): | 25 FORMATS = ['zeropad', 'numeric'] |
31 def convert(match): | 26 IN_CONVERT = {'zeropad': zeropad_to_numeric, 'numeric': noop} |
32 """ | 27 OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'numeric': noop} |
33 Converts a single roman numeral to a number | |
34 """ | |
35 numeral = match.group(1) | |
36 numeral = numeral.upper() | |
37 if numeral not in ROMAN: | |
38 # Unable to convert detected Roman numeral | |
39 return match.group(0) | |
40 return 'chr'+str(ROMAN.index(numeral))+(match.group(2) or '') | |
41 r = re.compile('chr([IVX]+)([^IVX]|$)', flags=re.IGNORECASE) | |
42 data = r.sub(convert, data) | |
43 return data | |
44 | |
45 | |
46 def numeric_to_roman(data): | |
47 def convert(match): | |
48 """ | |
49 Converts a number to a roman numeral | |
50 """ | |
51 number = int(match.group(1)) | |
52 if number >= len(ROMAN): | |
53 # Number is out of range to convert to a Roman numeral | |
54 return match.group(0) | |
55 return 'chr'+ROMAN[number]+(match.group(2) or '') | |
56 r = re.compile('chr(\d+)([^\d]|$)') | |
57 data = r.sub(convert, data) | |
58 return data | |
59 | |
60 FORMATS = ['zeropad', 'numeric', 'roman'] | |
61 IN_CONVERT = {'zeropad': zeropad_to_numeric, 'roman': roman_to_numeric, 'numeric': noop} | |
62 OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'roman': numeric_to_roman, 'numeric': noop} | |
63 | 28 |
64 | 29 |
65 def conversion_functions(in_fmt, out_fmt): | 30 def conversion_functions(in_fmt, out_fmt): |
66 """ | 31 """ |
67 Returns the proper list of functions to apply to perform a conversion | 32 Returns the proper list of functions to apply to perform a conversion |
68 """ | 33 """ |
69 return [IN_CONVERT[in_fmt], OUT_CONVERT[out_fmt]] | 34 return [IN_CONVERT[in_fmt], OUT_CONVERT[out_fmt]] |
70 | 35 |
71 | 36 |
72 def autodetect_format(data): | |
73 if re.search('chr0\d', data): | |
74 fmt = 'zeropad' | |
75 elif re.search('chr[IVXivx]', data): | |
76 fmt = 'roman' | |
77 else: | |
78 fmt = 'numeric' | |
79 return fmt | |
80 | |
81 | |
82 def convert_data(data, in_fmt, out_fmt): | 37 def convert_data(data, in_fmt, out_fmt): |
83 if in_fmt == 'autodetect': | |
84 in_fmt = autodetect_format(data) | |
85 for fn in conversion_functions(in_fmt, out_fmt): | 38 for fn in conversion_functions(in_fmt, out_fmt): |
86 data = fn(data) | 39 data = fn(data) |
87 return data | 40 return data |
88 | 41 |
89 | 42 |
423 for peak in reverse_peaks: | 376 for peak in reverse_peaks: |
424 if process_bounds[0] < peak.index < process_bounds[1]: | 377 if process_bounds[0] < peak.index < process_bounds[1]: |
425 write(cname, '-', peak) | 378 write(cname, '-', peak) |
426 | 379 |
427 | 380 |
428 def sort_chromosome_reads_by_index( input_path ): | 381 def sort_chromosome_reads_by_index(input_path): |
429 """ | 382 """ |
430 Return a gff file with chromosome reads sorted by index. | 383 Return a gff file with chromosome reads sorted by index. |
431 """ | 384 """ |
432 # Will this sort produce different results across platforms? | 385 # Will this sort produce different results across platforms? |
433 output_path = tempfile.NamedTemporaryFile( delete=False ).name | 386 output_path = tempfile.NamedTemporaryFile(delete=False).name |
434 command = 'sort -k 1,1 -k 4,4n "%s" > "%s"' % (input_path, output_path) | 387 command = 'sort -k 1,1 -k 4,4n "%s" > "%s"' % (input_path, output_path) |
435 p = subprocess.Popen(command, shell=True) | 388 p = subprocess.Popen(command, shell=True) |
436 p.wait() | 389 p.wait() |
437 return output_path | 390 return output_path |