Mercurial > repos > greg > genetrack
changeset 7:a7da50a23270 draft
Uploaded
author | greg |
---|---|
date | Tue, 24 Nov 2015 08:14:42 -0500 |
parents | fa85ca6c9cf8 |
children | e80af9b30cbb |
files | genetrack_util.py |
diffstat | 1 files changed, 5 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/genetrack_util.py Sat Nov 21 08:57:37 2015 -0500 +++ b/genetrack_util.py Tue Nov 24 08:14:42 2015 -0500 @@ -9,11 +9,6 @@ GFF_EXT = 'gff' SCIDX_EXT = 'scidx' -ROMAN = ['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', - 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX', - 'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX', - 'XXX'] - def noop(data): return data @@ -27,39 +22,9 @@ return re.sub(r'chr(\d([^\d]|$))', r'chr0\1', data) -def roman_to_numeric(data): - def convert(match): - """ - Converts a single roman numeral to a number - """ - numeral = match.group(1) - numeral = numeral.upper() - if numeral not in ROMAN: - # Unable to convert detected Roman numeral - return match.group(0) - return 'chr'+str(ROMAN.index(numeral))+(match.group(2) or '') - r = re.compile('chr([IVX]+)([^IVX]|$)', flags=re.IGNORECASE) - data = r.sub(convert, data) - return data - - -def numeric_to_roman(data): - def convert(match): - """ - Converts a number to a roman numeral - """ - number = int(match.group(1)) - if number >= len(ROMAN): - # Number is out of range to convert to a Roman numeral - return match.group(0) - return 'chr'+ROMAN[number]+(match.group(2) or '') - r = re.compile('chr(\d+)([^\d]|$)') - data = r.sub(convert, data) - return data - -FORMATS = ['zeropad', 'numeric', 'roman'] -IN_CONVERT = {'zeropad': zeropad_to_numeric, 'roman': roman_to_numeric, 'numeric': noop} -OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'roman': numeric_to_roman, 'numeric': noop} +FORMATS = ['zeropad', 'numeric'] +IN_CONVERT = {'zeropad': zeropad_to_numeric, 'numeric': noop} +OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'numeric': noop} def conversion_functions(in_fmt, out_fmt): @@ -69,19 +34,7 @@ return [IN_CONVERT[in_fmt], OUT_CONVERT[out_fmt]] -def autodetect_format(data): - if re.search('chr0\d', data): - fmt = 'zeropad' - elif re.search('chr[IVXivx]', data): - fmt = 'roman' - else: - fmt = 'numeric' - return fmt - - def convert_data(data, in_fmt, out_fmt): - if in_fmt == 'autodetect': - in_fmt = autodetect_format(data) for fn in conversion_functions(in_fmt, out_fmt): data = fn(data) return data @@ -425,12 +378,12 @@ write(cname, '-', peak) -def sort_chromosome_reads_by_index( input_path ): +def sort_chromosome_reads_by_index(input_path): """ Return a gff file with chromosome reads sorted by index. """ # Will this sort produce different results across platforms? - output_path = tempfile.NamedTemporaryFile( delete=False ).name + output_path = tempfile.NamedTemporaryFile(delete=False).name command = 'sort -k 1,1 -k 4,4n "%s" > "%s"' % (input_path, output_path) p = subprocess.Popen(command, shell=True) p.wait()